Mr7Explorer commited on
Commit
b8b02b5
·
verified ·
1 Parent(s): d2789be

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +261 -861
app.py CHANGED
@@ -1,5 +1,5 @@
1
  # ============================================================
2
- # app.py (Updated Full VersionChunk 1: Lines 1–300)
3
  # ============================================================
4
 
5
  import gradio as gr
@@ -24,10 +24,11 @@ except ImportError:
24
  LOUDNESS_AVAILABLE = False
25
 
26
 
27
- # ==================== ANALYSIS FUNCTIONS ====================
 
 
28
 
29
  def read_audio_info(path):
30
- """Read audio file metadata"""
31
  info = sf.info(path)
32
  return {
33
  "samplerate": int(info.samplerate),
@@ -39,15 +40,16 @@ def read_audio_info(path):
39
  }
40
 
41
 
 
 
 
 
42
  def compute_time_domain_stats(y):
43
- """Calculate time-domain statistics"""
44
  peak = float(np.max(np.abs(y)))
45
  rms = float(np.sqrt(np.mean(y ** 2)))
46
-
47
  peak_db = 20 * np.log10(max(peak, 1e-12))
48
  rms_db = 20 * np.log10(max(rms, 1e-12))
49
  crest_factor = peak_db - rms_db
50
-
51
  abs_y = np.abs(y)
52
  noise_floor = float(np.percentile(abs_y, 10))
53
  snr_est = 20 * np.log10(max(rms, 1e-12) / max(noise_floor, 1e-12))
@@ -66,89 +68,68 @@ def compute_time_domain_stats(y):
66
 
67
 
68
  # ============================================================
69
- # UPDATED SPECTRAL ANALYSIS FUNCTION (FFT=4096, 90th percentile)
70
  # ============================================================
71
 
72
  def compute_spectral_analysis(y, sr, n_fft=4096):
73
- """Comprehensive spectral analysis tuned for speech QC"""
74
-
75
- hop_length = n_fft // 4
76
-
77
- # STFT
78
- S = np.abs(librosa.stft(y, n_fft=n_fft, hop_length=hop_length, window="hann"))
79
  freqs = np.linspace(0, sr / 2, S.shape[0])
80
-
81
- # Convert amplitude to dB
82
  S_db = librosa.amplitude_to_db(S, ref=np.max)
83
 
84
- # ===== UPDATED ENERGY ESTIMATE: 90th percentile of power =====
85
  S_power = S ** 2
86
  energy = np.percentile(S_power, 90, axis=1) + 1e-20
87
  total_energy = float(np.sum(energy))
88
  cum_energy = np.cumsum(energy)
89
 
90
- # Rolloffs
91
- roll85_idx = np.searchsorted(cum_energy, 0.85 * total_energy)
92
- roll95_idx = np.searchsorted(cum_energy, 0.95 * total_energy)
93
-
94
- freq_at_85 = float(freqs[min(roll85_idx, len(freqs) - 1)])
95
- freq_at_95 = float(freqs[min(roll95_idx, len(freqs) - 1)])
96
 
97
- # ===== UPDATED HF ENVELOPE: 90th percentile of dB =====
98
- mean_db_per_bin = np.percentile(S_db, 90, axis=1)
99
 
100
- peak_db = float(np.max(S_db))
101
- threshold_db = peak_db - 60
 
 
 
102
 
103
- non_silent_bins = np.where(mean_db_per_bin > threshold_db)[0]
104
- highest_freq = float(freqs[non_silent_bins[-1]]) if non_silent_bins.size else 0.0
105
-
106
- # ===================== UPDATED SPEECH-CENTRIC BANDS =====================
107
- def band_energy(low, high):
108
  i1 = np.searchsorted(freqs, low)
109
  i2 = np.searchsorted(freqs, high)
110
  return float(100 * np.sum(energy[i1:i2]) / total_energy)
111
 
112
- def band_energy_above(f):
113
  idx = np.searchsorted(freqs, f)
114
  return float(100 * np.sum(energy[idx:]) / total_energy)
115
 
116
  energy_stats = {
117
- "below_100hz": band_energy(0, 100),
118
- "100_500hz": band_energy(100, 500),
119
- "500_2khz": band_energy(500, 2000),
120
- "2k_8khz": band_energy(2000, 8000),
121
- "8k_12khz": band_energy(8000, 12000),
122
- "12k_16khz": band_energy(12000, 16000),
123
- "above_16khz": band_energy_above(16000)
124
  }
125
 
126
- # Brickwall detection
127
- diffs = np.diff(mean_db_per_bin)
128
- big_drop_idx = np.where(diffs < -20)[0]
129
- brick_wall = bool(big_drop_idx.size)
130
- brick_freq = float(freqs[big_drop_idx[0]]) if big_drop_idx.size else None
131
 
132
- # Spectral notches
133
- smooth = sps.medfilt(mean_db_per_bin, kernel_size=9)
134
  minima = sps.argrelextrema(smooth, np.less)[0]
135
  notches = []
136
-
137
  for m in minima:
138
  left = smooth[max(0, m - 6):m]
139
- right = smooth[m + 1:min(len(smooth), m + 7)]
140
- neighbor_peak = max(
141
- left.max() if left.size else -999,
142
- right.max() if right.size else -999
143
- )
144
- depth = neighbor_peak - smooth[m]
145
  if depth >= 15 and freqs[m] > 100:
146
- notches.append({
147
- "freq": float(freqs[m]),
148
- "depth_db": float(depth)
149
- })
150
 
151
- # Additional spectral stats
152
  centroid = float(np.mean(librosa.feature.spectral_centroid(S=S, sr=sr)))
153
  bandwidth = float(np.mean(librosa.feature.spectral_bandwidth(S=S, sr=sr)))
154
  flatness = float(np.mean(librosa.feature.spectral_flatness(S=S)))
@@ -157,13 +138,12 @@ def compute_spectral_analysis(y, sr, n_fft=4096):
157
  return {
158
  "S_db": S_db,
159
  "freqs": freqs,
160
- "hop_length": hop_length,
161
- "n_fft": n_fft,
162
- "rolloff_85pct": freq_at_85,
163
- "rolloff_95pct": freq_at_95,
164
  "highest_freq_minus60db": highest_freq,
165
  "energy_distribution": energy_stats,
166
- "brick_wall_detected": brick_wall,
167
  "brick_wall_freq": brick_freq,
168
  "spectral_notches": notches,
169
  "spectral_centroid": centroid,
@@ -171,927 +151,347 @@ def compute_spectral_analysis(y, sr, n_fft=4096):
171
  "spectral_flatness": flatness,
172
  "spectral_rolloff": rolloff
173
  }
174
- def compute_loudness(y, sr):
175
- """Compute integrated loudness (LUFS) using pyloudnorm."""
176
- if not LOUDNESS_AVAILABLE:
177
- return None
 
 
 
178
  try:
179
- meter = pyln.Meter(sr)
180
- loudness = float(meter.integrated_loudness(y))
181
- return loudness
182
- except Exception:
183
- return None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
184
 
185
  # ============================================================
186
- # ADVANCED ISSUE DETECTION ENGINE
187
- # Includes: HF-loss logic, LPF detector, HPF detector,
188
- # NR artifacts, spectral anomalies, compression, clipping
189
  # ============================================================
190
 
191
  def detect_audio_issues(spectral, time_stats):
192
- """Detect audio processing artifacts with advanced forensic analysis."""
193
-
194
  issues = []
195
  energy = spectral["energy_distribution"]
196
  freqs = spectral["freqs"]
197
- hf_env = spectral.get("hf_env", None)
198
- lf_env = spectral.get("lf_env", None)
199
- flatness = spectral.get("spectral_flatness", None)
200
- notches = spectral.get("spectral_notches", [])
201
-
202
- # ============================================================
203
- # 1️⃣ HF LOSS LOGIC (Speech-safe Thresholds)
204
- # ============================================================
205
-
206
  hf_8_12 = energy["8k_12khz"]
207
- highest_freq = spectral["highest_freq_minus60db"]
208
-
209
- # Severe HF cutoff Real LPF or aggressive NR
210
- if hf_8_12 < 0.01 and highest_freq < 9000:
211
- issues.append((
212
- "HF_LOSS", "HIGH",
213
- f"Severe HF cutoff: {hf_8_12:.3f}% in 8–12k and rolloff at {highest_freq:.1f} Hz."
214
- ))
215
- # Mild HF weakness → Normal for speech
216
  elif hf_8_12 < 0.02:
217
- issues.append((
218
- "HF_LOSS", "LOW",
219
- f"Low HF energy ({hf_8_12:.3f}%). Normal for speech."
220
- ))
221
-
222
- # ============================================================
223
- # 2️⃣ LPF DETECTOR (Low-pass filter)
224
- # ============================================================
225
-
226
- if hf_env is not None:
227
- hf_region = (freqs >= 5000) & (freqs <= 12000)
228
- hf_vals = hf_env[hf_region]
229
- hf_freq = freqs[hf_region]
230
-
231
- if len(hf_vals) > 10:
232
- coef = np.polyfit(hf_freq, hf_vals, 1)
233
- slope_per_hz = coef[0]
234
- slope_db_oct = slope_per_hz * np.log2(2) * 12000
235
-
236
- # Hard LPF cutoff
237
- if highest_freq < 10000:
238
- issues.append((
239
- "LPF_DETECTED", "HIGH",
240
- f"Low-pass filter near {highest_freq:.0f} Hz."
241
- ))
242
-
243
- # Soft HF tilt (EQ shelf)
244
- elif slope_db_oct < -6:
245
- issues.append((
246
- "HF_EQ_SHELF", "LOW",
247
- f"HF rolloff detected (~{slope_db_oct:.1f} dB/oct)."
248
- ))
249
-
250
- # ============================================================
251
- # 3️⃣ HPF DETECTOR (High-pass filter)
252
- # ============================================================
253
-
254
- if lf_env is not None:
255
- low_region = (freqs >= 20) & (freqs <= 300)
256
- lf_vals = lf_env[low_region]
257
- lf_freq = freqs[low_region]
258
-
259
- if len(lf_vals) > 10:
260
- coef_l = np.polyfit(lf_freq, lf_vals, 1)
261
- slope_l = coef_l[0]
262
- slope_db_oct_l = slope_l * np.log2(2) * 300
263
-
264
- if energy["below_100hz"] < 0.5:
265
- if slope_db_oct_l > 6:
266
- issues.append((
267
- "HPF_DETECTED", "HIGH",
268
- f"High-pass filter detected (~{slope_db_oct_l:.1f} dB/oct)."
269
- ))
270
- else:
271
- issues.append((
272
- "HPF_SUSPECTED", "LOW",
273
- f"Possible mild HPF (LF rolloff)."
274
- ))
275
-
276
- # ============================================================
277
- # 4️⃣ Noise Reduction Artifact Detector
278
- # ============================================================
279
-
280
- if flatness is not None:
281
- hf_flat = np.mean(flatness[-20:]) # Flattening in top HF region
282
-
283
- # Strong NR → metallic artifacts, HF flattening + notches
284
- if hf_flat > 0.40 and len(notches) >= 3:
285
- issues.append((
286
- "NOISE_REDUCTION_ARTIFACTS", "HIGH",
287
- f"NR artifacts: HF flattening ({hf_flat:.2f}) + {len(notches)} notches."
288
- ))
289
-
290
- # Mild NR
291
- elif hf_flat > 0.35:
292
- issues.append((
293
- "NR_SOFT", "LOW",
294
- f"Mild noise reduction detected (HF flattening={hf_flat:.2f})."
295
- ))
296
-
297
- # ============================================================
298
- # 5️⃣ Spectral Notches (Resonance Removal / NR)
299
- # ============================================================
300
-
301
- if len(notches) > 0:
302
- issues.append((
303
- "SPECTRAL_NOTCHES", "MEDIUM",
304
- f"{len(notches)} spectral notches detected."
305
- ))
306
-
307
- # ============================================================
308
- # 6️⃣ Brick-wall LPF (from original code)
309
- # ============================================================
310
 
311
  if spectral["brick_wall_detected"]:
312
- issues.append((
313
- "BRICK_WALL", "HIGH",
314
- f"Brick-wall behavior at {spectral['brick_wall_freq']:.0f} Hz."
315
- ))
316
 
317
- # ============================================================
318
- # 7️⃣ Compression / Dynamics
319
- # ============================================================
 
320
 
321
- crest = time_stats["crest_factor_db"]
 
 
322
 
 
323
  if crest < 3:
324
- issues.append((
325
- "OVER_COMPRESSION", "HIGH",
326
- f"Very low crest factor ({crest:.1f} dB)."
327
- ))
328
  elif crest < 6:
329
- issues.append((
330
- "COMPRESSION", "MEDIUM",
331
- f"Moderate compression ({crest:.1f} dB)."
332
- ))
333
-
334
- # ============================================================
335
- # 8️⃣ Clipping
336
- # ============================================================
337
 
338
  if time_stats["peak"] >= 0.999:
339
- issues.append((
340
- "CLIPPING", "CRITICAL",
341
- f"Peak amplitude {time_stats['peak']:.6f}. Possible clipping."
342
- ))
343
- # ============================================================
344
- # 9️⃣ DE-ESSER DETECTOR (HF transient suppression)
345
- # ============================================================
346
-
347
- # Presence & sibilance bands
348
- band_3_6k = (freqs >= 3000) & (freqs <= 6000)
349
- band_6_10k = (freqs >= 6000) & (freqs <= 10000)
350
-
351
- if hf_env is not None:
352
- presence_energy = np.mean(hf_env[band_3_6k])
353
- sibilance_energy = np.mean(hf_env[band_6_10k])
354
-
355
- # Ratio of presence energy to sibilance energy
356
- if sibilance_energy < (presence_energy * 0.20):
357
- issues.append((
358
- "DE_ESSER_DETECTED", "MEDIUM",
359
- "Sibilance band (6–10 kHz) strongly reduced relative to presence band (3–6 kHz). Possible de-essing."
360
- ))
361
- # ============================================================
362
- # 🔟 MULTIBAND COMPRESSION DETECTOR
363
- # ============================================================
364
-
365
- lf_band = (freqs >= 80) & (freqs <= 300)
366
- mf_band = (freqs >= 300) & (freqs <= 3000)
367
- hf_band = (freqs >= 3000) & (freqs <= 8000)
368
-
369
- def band_crest(env, band):
370
- vals = env[band]
371
- if len(vals) == 0:
372
- return None
373
- return np.max(vals) - np.mean(vals)
374
-
375
- if hf_env is not None:
376
- cf_lf = band_crest(hf_env, lf_band)
377
- cf_mf = band_crest(hf_env, mf_band)
378
- cf_hf = band_crest(hf_env, hf_band)
379
-
380
- # Compression fingerprint: MF and HF crest factor collapse
381
- if cf_mf is not None and cf_hf is not None and cf_lf is not None:
382
-
383
- # Heavy multiband compression signature
384
- if cf_hf < (cf_lf * 0.4):
385
- issues.append((
386
- "MULTIBAND_COMPRESSION", "MEDIUM",
387
- "HF crest factor significantly lower than LF. Possible multiband compression."
388
- ))
389
-
390
- if cf_mf < (cf_lf * 0.5):
391
- issues.append((
392
- "MULTIBAND_COMPRESSION", "LOW",
393
- "Mid-band crest factor unusually compressed vs LF."
394
- ))
395
- # ============================================================
396
- # 1️⃣1️⃣ EQ CURVE CLASSIFIER
397
- # ============================================================
398
-
399
- if hf_env is not None:
400
- # Smooth envelope for stability
401
- smooth = sps.medfilt(hf_env, kernel_size=9)
402
-
403
- # Evaluate global tilt (HF slope)
404
- coef_eq = np.polyfit(freqs, smooth, 1)
405
- tilt = coef_eq[0]
406
-
407
- # Check curvature — identifies shelves and peaking EQ
408
- curvature = np.polyfit(freqs, smooth, 2)[0]
409
-
410
- # Detect HF shelf boost
411
- if tilt > 0.00002:
412
- issues.append((
413
- "EQ_HF_BOOST", "LOW",
414
- "HF shelf boost detected (positive spectral tilt)."
415
- ))
416
-
417
- # Detect HF shelf cut
418
- elif tilt < -0.00002:
419
- issues.append((
420
- "EQ_HF_CUT", "LOW",
421
- "HF shelf cut detected (negative spectral tilt)."
422
- ))
423
-
424
- # Detect midrange peaking EQ
425
- if curvature > 1e-12:
426
- issues.append((
427
- "EQ_PEAKING", "LOW",
428
- "Spectral curvature indicates possible midrange peaking EQ."
429
- ))
430
-
431
- # Detect tilt EQ
432
- if abs(tilt) > 0.00001 and abs(curvature) < 1e-12:
433
- issues.append((
434
- "EQ_TILT", "LOW",
435
- "Tilt EQ detected (linear upward/downward spectral tilt)."
436
- ))
437
-
438
- # ============================================================
439
- # Final return
440
- # ============================================================
441
 
442
  return issues
443
 
 
444
  # ============================================================
445
- # REPORT GENERATION
446
  # ============================================================
447
 
448
- def create_report(audio_data, output_path):
449
- """Create comprehensive PNG report"""
450
-
451
  plt.style.use("default")
452
-
453
- # UPDATED FIGURE SIZE
454
  fig = plt.figure(figsize=(22, 16))
455
  fig.patch.set_facecolor("white")
456
 
457
  fig.suptitle(
458
- f"AUDIO FORENSIC ANALYSIS REPORT\n{audio_data['filename']}",
459
- fontsize=20,
460
- fontweight="bold",
461
- y=0.97
462
  )
463
 
464
  gs = gridspec.GridSpec(
465
- 4, 4,
466
- figure=fig,
467
- hspace=0.4,
468
- wspace=0.4,
469
- height_ratios=[1.5, 1, 0.8, 0.9],
470
- left=0.05,
471
- right=0.95,
472
- top=0.92,
473
- bottom=0.05
474
  )
475
 
476
- # ============================
477
- # SPECTROGRAM PLOT (UPDATED)
478
- # ============================
479
-
480
- ax_spec = fig.add_subplot(gs[0, :])
481
-
482
- S_db = audio_data["spectral"]["S_db"]
483
- sr = audio_data["info"]["samplerate"]
484
- hop = audio_data["spectral"]["hop_length"]
485
 
486
  img = librosa.display.specshow(
487
- S_db,
488
- sr=sr,
489
- hop_length=hop,
490
- y_axis="hz",
491
- x_axis="time",
492
- cmap="viridis",
493
- ax=ax_spec,
494
- vmin=-80,
495
- vmax=0
496
  )
 
 
497
 
498
- ax_spec.set_title("Spectrogram", fontsize=14, fontweight="bold", pad=10)
499
- ax_spec.set_ylabel("Frequency (Hz)", fontsize=11, fontweight="bold")
500
- ax_spec.set_xlabel("Time (seconds)", fontsize=11, fontweight="bold")
501
- ax_spec.grid(True, alpha=0.3, linestyle="--", linewidth=0.5)
502
-
503
- cbar = plt.colorbar(img, ax=ax_spec, format="%+2.0f dB", pad=0.01)
504
- cbar.ax.tick_params(labelsize=10)
505
- cbar.set_label("Magnitude (dB)", fontsize=10, fontweight="bold")
506
-
507
- # ============================
508
- # FILE INFO BLOCK
509
- # ============================
510
-
511
- ax_info = fig.add_subplot(gs[1, 0:2])
512
- ax_info.axis("off")
513
 
514
- info = audio_data["info"]
515
- time = audio_data["time_stats"]
516
 
517
- info_lines = [
518
  "FILE INFORMATION",
519
- "─" * 50,
520
- f"Sample Rate: {info['samplerate']:,} Hz",
521
- f"Channels: {info['channels']}",
522
- f"Duration: {info['duration']:.2f} sec",
523
- f"Format: {info['format']} ({info['subtype']})",
524
- f"Total Frames: {info['frames']:,}",
525
  "",
526
- "TIME-DOMAIN ANALYSIS",
527
- "─" * 50,
528
- f"Peak Level: {time['peak_db']:.2f} dBFS ({time['peak']:.6f})",
529
- f"RMS Level: {time['rms_db']:.2f} dBFS ({time['rms']:.6f})",
530
- f"Crest Factor: {time['crest_factor_db']:.2f} dB",
531
- f"Noise Floor: {time['noise_floor']:.6f}",
532
- f"Est. SNR: {time['snr_db']:.1f} dB",
533
- f"Zero Cross Rate: {time['zero_crossing_rate']:.4f}",
534
  ]
535
 
536
- if audio_data.get("lufs") is not None:
537
- info_lines.extend([
538
- "",
539
- "LOUDNESS (BS.1770)",
540
- "─" * 50,
541
- f"Integrated LUFS: {audio_data['lufs']:.2f} LUFS"
542
- ])
543
-
544
- info_text = "\n".join(info_lines)
545
-
546
- ax_info.text(
547
- 0.05, 0.95, info_text,
548
- transform=ax_info.transAxes,
549
- fontsize=11,
550
- verticalalignment="top",
551
- family="monospace",
552
- bbox=dict(
553
- boxstyle="round,pad=1",
554
- facecolor="#E8F4F8",
555
- edgecolor="#0077BE",
556
- linewidth=2
557
- )
558
- )
559
- # ============================
560
- # SPECTRAL STATS PANEL
561
- # ============================
562
 
563
- ax_spectral = fig.add_subplot(gs[1, 2:4])
564
- ax_spectral.axis("off")
 
565
 
566
- spec = audio_data["spectral"]
567
- energy = spec["energy_distribution"]
 
 
 
568
 
569
- spectral_lines = [
570
  "SPECTRAL ANALYSIS",
571
- "─" * 50,
572
- f"Centroid: {spec['spectral_centroid']:.1f} Hz",
573
- f"Bandwidth: {spec['spectral_bandwidth']:.1f} Hz",
574
- f"Flatness: {spec['spectral_flatness']:.4f}",
575
- f"Rolloff: {spec['spectral_rolloff']:.1f} Hz",
 
576
  "",
577
- "FREQUENCY ROLLOFF POINTS",
578
- "" * 50,
579
- f"85% Energy: {spec['rolloff_85pct']:.1f} Hz",
580
- f"95% Energy: {spec['rolloff_95pct']:.1f} Hz",
581
- f"Highest (-60dB): {spec['highest_freq_minus60db']:.1f} Hz",
582
- "",
583
- "ENERGY DISTRIBUTION (Speech Bands)",
584
- "─" * 50,
585
- f"< 100 Hz: {energy['below_100hz']:.2f}%",
586
- f"100–500 Hz: {energy['100_500hz']:.2f}%",
587
- f"500–2k Hz: {energy['500_2khz']:.2f}%",
588
- f"2k–8k Hz: {energy['2k_8khz']:.2f}%",
589
- f"8k–12k Hz: {energy['8k_12khz']:.2f}%",
590
- f"12k–16k Hz: {energy['12k_16khz']:.2f}%",
591
- f"> 16k Hz: {energy['above_16khz']:.2f}%",
592
- ]
593
-
594
- spectral_text = "\n".join(spectral_lines)
595
-
596
- ax_spectral.text(
597
- 0.05, 0.95, spectral_text,
598
- transform=ax_spectral.transAxes,
599
- fontsize=11,
600
- verticalalignment="top",
601
- family="monospace",
602
- bbox=dict(
603
- boxstyle="round,pad=1",
604
- facecolor="#FFF4E6",
605
- edgecolor="#FF8C00",
606
- linewidth=2
607
- )
608
- )
609
-
610
-
611
- # ============================
612
- # ENERGY DISTRIBUTION BAR CHART
613
- # ============================
614
-
615
- ax_energy = fig.add_subplot(gs[2, :])
616
-
617
- bands = [
618
- "<100Hz",
619
- "100–500Hz",
620
- "500–2kHz",
621
- "2k–8kHz",
622
- "8k–12kHz",
623
- "12k–16kHz",
624
- ">16kHz"
625
- ]
626
-
627
- values = [
628
- energy["below_100hz"],
629
- energy["100_500hz"],
630
- energy["500_2khz"],
631
- energy["2k_8khz"],
632
- energy["8k_12khz"],
633
- energy["12k_16khz"],
634
- energy["above_16khz"]
635
  ]
636
 
637
- colors = [
638
- "#2C3E50",
639
- "#E74C3C",
640
- "#E67E22",
641
- "#F39C12",
642
- "#2ECC71",
643
- "#3498DB",
644
- "#9B59B6"
645
- ]
646
-
647
- bars = ax_energy.bar(
648
- bands, values,
649
- color=colors,
650
- edgecolor="black",
651
- linewidth=1.5,
652
- alpha=0.85
653
- )
654
 
655
- ax_energy.set_ylabel("Energy Percentage (%)", fontsize=12, fontweight="bold")
656
- ax_energy.set_title("Frequency Band Energy Distribution", fontsize=13, fontweight="bold", pad=10)
657
- ax_energy.grid(axis="y", alpha=0.4, linestyle="--", linewidth=0.8)
658
- ax_energy.set_ylim(0, max(values) * 1.15 if max(values) > 0 else 1)
659
- ax_energy.set_axisbelow(True)
660
-
661
- for bar, val in zip(bars, values):
662
- height = bar.get_height()
663
- ax_energy.text(
664
- bar.get_x() + bar.get_width() / 2., height + 0.5,
665
- f"{val:.2f}%",
666
- ha="center",
667
- va="bottom",
668
- fontsize=10,
669
- fontweight="bold"
670
- )
671
 
 
 
672
 
673
- # ============================
674
- # ISSUES PANEL (UPDATED)
675
- # ============================
676
-
677
- ax_issues = fig.add_subplot(gs[3, 0:3])
678
- ax_issues.axis("off")
679
 
680
- issues = audio_data["issues"]
 
 
681
 
682
- issue_lines = [
683
- "DETECTED ISSUES & WARNINGS",
684
- "" * 80
685
- ]
686
 
687
- # No issues
688
- if not issues:
689
- issue_lines.append("✅ No significant issues detected.")
690
 
691
- else:
692
- # Updated severity mapping
693
- severity_icons = {
694
- "CRITICAL": "🔴 CRITICAL",
695
- "HIGH": "🟠 HIGH",
696
- "MEDIUM": "🟡 MEDIUM",
697
- "LOW": "🟢 LOW"
698
- }
699
 
700
- # Dynamic issue listing (supports all new detectors)
701
- for issue_type, severity, description in issues:
702
- icon = severity_icons.get(severity, "⚪ INFO")
703
- issue_lines.append(f"\n{icon} — {issue_type}")
704
- issue_lines.append(f" → {description}")
705
-
706
- # ============================
707
- # SPECTRAL NOTCH DETAILS
708
- # ============================
709
-
710
- if spec["spectral_notches"]:
711
- issue_lines.append("\n🎵 SPECTRAL NOTCHES DETECTED:")
712
- issue_lines.append(f" Total: {len(spec['spectral_notches'])}")
713
-
714
- for i, notch in enumerate(spec["spectral_notches"][:5], start=1):
715
- issue_lines.append(
716
- f" {i}. {notch['freq']:.1f} Hz (Depth: {notch['depth_db']:.1f} dB)"
717
- )
718
-
719
- if len(spec["spectral_notches"]) > 5:
720
- issue_lines.append(
721
- f" ... and {len(spec['spectral_notches']) - 5} more notches"
722
- )
723
-
724
- # ============================
725
- # BRICK-WALL FILTER NOTICE
726
- # ============================
727
-
728
- if spec["brick_wall_detected"]:
729
- issue_lines.append(
730
- f"\n⚠️ BRICK-WALL FILTER DETECTED at {spec['brick_wall_freq']:.0f} Hz"
731
- )
732
 
733
- # ==================================================================
734
- # FINAL OUTPUT
735
- # ==================================================================
736
-
737
- issues_text = "\n".join(issue_lines)
738
-
739
- ax_issues.text(
740
- 0.05, 0.95,
741
- issues_text,
742
- transform=ax_issues.transAxes,
743
- fontsize=11,
744
- verticalalignment="top",
745
- family="monospace",
746
- bbox=dict(
747
- boxstyle="round,pad=1",
748
- facecolor="#FFE6E6",
749
- edgecolor="#DC143C",
750
- linewidth=2
751
- )
752
- )
753
 
754
- # ============================
755
- # QUALITY SCORE PANEL (UPDATED)
756
- # ============================
757
-
758
- ax_score = fig.add_subplot(gs[3, 3])
759
- ax_score.axis("off")
760
-
761
- issues = audio_data["issues"]
762
-
763
- # Separate counts by severity
764
- critical = sum(1 for _, sev, _ in issues if sev == "CRITICAL")
765
- high = sum(1 for _, sev, _ in issues if sev == "HIGH")
766
- medium = sum(1 for _, sev, _ in issues if sev == "MEDIUM")
767
- low = sum(1 for _, sev, _ in issues if sev == "LOW")
768
-
769
- # --------------------------------------------
770
- # NEW: Weighted scoring model
771
- # --------------------------------------------
772
- score = 100
773
-
774
- score -= critical * 35 # Hard-damage issues
775
- score -= high * 20 # Major processing
776
- score -= medium * 8 # Subtle but relevant
777
- score -= low * 3 # Minor processing
778
-
779
- # Additional penalties for heavy processing
780
- if len(issues) >= 6:
781
- score -= 10
782
-
783
- if (critical + high) >= 3:
784
- score -= 10
785
-
786
- # Bonus for clean files
787
- if len(issues) == 0:
788
- score += 5
789
-
790
- score = max(0, min(score, 100))
791
-
792
- # --------------------------------------------
793
- # GRADE + COLOR MAPPING
794
- # --------------------------------------------
795
- if score >= 90:
796
- grade, quality, color = "A", "EXCELLENT", "#00C853"
797
- recommendation = "Excellent for TTS dataset"
798
- elif score >= 75:
799
- grade, quality, color = "B", "GOOD", "#64DD17"
800
- recommendation = "Very good quality; suitable for TTS"
801
- elif score >= 60:
802
- grade, quality, color = "C", "FAIR", "#FFD600"
803
- recommendation = "Usable but may contain processing artifacts"
804
- elif score >= 40:
805
- grade, quality, color = "D", "POOR", "#FF6D00"
806
- recommendation = "Not recommended for TTS (heavy processing)"
807
- else:
808
- grade, quality, color = "F", "CRITICAL", "#D50000"
809
- recommendation = "Severely degraded or processed; avoid for TTS"
810
-
811
- # --------------------------------------------
812
- # NEW: CLEANLINESS & PROCESSING INDEX
813
- # --------------------------------------------
814
- cleanliness_score = max(0, 100 - (medium * 5 + low * 3))
815
- processing_severity = (critical * 3) + (high * 2) + medium
816
-
817
- score_lines = [
818
- "QUALITY ASSESSMENT",
819
- "═" * 28,
820
- "",
821
- f"SCORE: {score}/100",
822
- f"GRADE: {grade}",
823
- f"QUALITY: {quality}",
824
- "",
825
- "RECOMMENDATION:",
826
- f"{recommendation}",
827
- "",
828
- "CLEANLINESS SCORE:",
829
- f"{cleanliness_score}/100",
830
  "",
831
- "PROCESSING SEVERITY INDEX:",
832
- f"{processing_severity}",
 
833
  "",
834
- "ISSUES SUMMARY",
835
- "─" * 28,
836
- f"🔴 Critical: {critical}",
837
- f"🟠 High: {high}",
838
- f"🟡 Medium: {medium}",
839
- f"🟢 Low: {low}",
840
- "",
841
- "─" * 28,
842
- "Generated:",
843
- f"{audio_data['timestamp']}"
844
  ]
845
 
846
- score_text = "\n".join(score_lines)
847
-
848
- ax_score.text(
849
- 0.5, 0.5, score_text,
850
- transform=ax_score.transAxes,
851
- fontsize=11,
852
- ha="center",
853
- va="center",
854
- family="monospace",
855
- bbox=dict(
856
- boxstyle="round,pad=1.2",
857
- facecolor=color,
858
- edgecolor="black",
859
- linewidth=3,
860
- alpha=0.75
861
- ),
862
- fontweight="bold"
863
- )
864
 
865
- # SAVE REPORT
866
- plt.savefig(
867
- output_path,
868
- dpi=300,
869
- bbox_inches="tight",
870
- facecolor="white",
871
- edgecolor="none"
872
- )
873
  plt.close()
 
874
 
875
- return output_path
876
 
877
  # ============================================================
878
- # MAIN ANALYSIS FUNCTION (GRADIO CALLBACK)
879
  # ============================================================
880
 
881
- def analyze_audio(audio_file, progress=gr.Progress()):
882
- """Analyze uploaded audio file."""
883
- if audio_file is None:
884
- return None, "⚠️ Please upload an audio file to analyze."
885
 
886
  try:
887
- progress(0.1, desc="Reading audio file...")
 
888
 
889
- output_dir = Path("reports")
890
- output_dir.mkdir(exist_ok=True)
891
 
892
- path = Path(audio_file)
 
893
 
894
- progress(0.2, desc="Loading audio data...")
895
- info = read_audio_info(str(path))
896
- y, sr = librosa.load(str(path), sr=None, mono=True)
897
 
898
- progress(0.4, desc="Analyzing time-domain...")
899
- time_stats = compute_time_domain_stats(y)
900
-
901
- progress(0.6, desc="Performing spectral analysis...")
902
- spectral = compute_spectral_analysis(y, sr)
903
-
904
- progress(0.7, desc="Computing loudness...")
905
  lufs = compute_loudness(y, sr) if LOUDNESS_AVAILABLE else None
906
 
907
- progress(0.8, desc="Detecting audio issues...")
908
- issues = detect_audio_issues(spectral, time_stats)
 
 
 
909
 
910
- audio_data = {
911
- "filename": path.name,
912
  "info": info,
913
- "time_stats": time_stats,
914
- "spectral": spectral,
915
  "lufs": lufs,
916
  "issues": issues,
 
 
917
  "timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S")
918
  }
919
 
920
- progress(0.9, desc="Generating report...")
921
-
922
- output_filename = path.stem + "_report.png"
923
- output_path = output_dir / output_filename
924
-
925
- create_report(audio_data, str(output_path))
926
-
927
- progress(1.0, desc="Complete!")
928
 
929
- # ============================
930
- # SCORE COMPUTATION
931
- # ============================
932
 
933
- critical = sum(1 for _, sev, _ in issues if sev == "CRITICAL")
934
- high = sum(1 for _, sev, _ in issues if sev == "HIGH")
935
- medium = sum(1 for _, sev, _ in issues if sev == "MEDIUM")
936
-
937
- score = 100 - (critical * 30) - (high * 15) - (medium * 5)
938
- score = max(0, score)
939
-
940
- if score >= 90:
941
- grade, quality, color = "A", "EXCELLENT", "🟢"
942
- elif score >= 75:
943
- grade, quality, color = "B", "GOOD", "🟢"
944
- elif score >= 60:
945
- grade, quality, color = "C", "FAIR", "🟡"
946
- elif score >= 40:
947
- grade, quality, color = "D", "POOR", "🟠"
948
- else:
949
- grade, quality, color = "F", "CRITICAL", "🔴"
950
-
951
- energy = spectral["energy_distribution"]
952
-
953
- # ============================
954
- # SUMMARY OUTPUT (Markdown)
955
- # ============================
956
 
957
  summary = f"""
958
- # 🎵 Analysis Complete!
959
- ## File Information
960
- - **Filename:** `{audio_data['filename']}`
961
- - **Duration:** {info['duration']:.2f} sec
962
- - **Sample Rate:** {info['samplerate']:,} Hz
963
- - **Channels:** {info['channels']}
964
- - **Format:** {info['format']} ({info['subtype']})
965
 
966
- ---
 
 
967
 
968
- ## Quality Assessment
969
- ### Overall Score: **{score}/100** — Grade **{grade}** {color}
970
- **Quality Rating:** {quality}
971
-
972
- ### Audio Metrics
973
- | Metric | Value |
974
- |--------|--------|
975
- | Peak Level | {time_stats['peak_db']:.2f} dBFS |
976
- | RMS Level | {time_stats['rms_db']:.2f} dBFS |
977
- | Crest Factor | {time_stats['crest_factor_db']:.2f} dB |
978
- | SNR (Est.) | {time_stats['snr_db']:.1f} dB |
979
- """
980
-
981
- if lufs is not None:
982
- summary += f"| Integrated LUFS | {lufs:.2f} LUFS |\n"
983
-
984
- summary += f"""
985
  ---
986
 
987
- ## Spectral Analysis
988
- | Parameter | Value |
989
- |-----------|--------|
990
- | Spectral Centroid | {spectral['spectral_centroid']:.1f} Hz |
991
- | 85% Rolloff | {spectral['rolloff_85pct']:.1f} Hz |
992
- | 95% Rolloff | {spectral['rolloff_95pct']:.1f} Hz |
993
- | Highest Freq (–60 dB) | {spectral['highest_freq_minus60db']:.1f} Hz |
994
 
995
- ### Energy Distribution (Speech Bands)
996
 
997
- - **<100 Hz:** {energy['below_100hz']:.2f}%
998
- - **100–500 Hz:** {energy['100_500hz']:.2f}%
999
- - **500–2k Hz:** {energy['500_2khz']:.2f}%
1000
- - **2k–8k Hz:** {energy['2k_8khz']:.2f}%
1001
- - **8k–12k Hz:** {energy['8k_12khz']:.2f}%
1002
- - **12k–16k Hz:** {energy['12k_16khz']:.2f}%
1003
- - **>16k Hz:** {energy['above_16khz']:.2f}%
1004
 
1005
  ---
1006
 
1007
- ## Issues Detected: **{len(issues)}**
1008
  """
1009
 
1010
- if issues:
1011
- summary += "\n### ⚠️ Detected Issues:\n\n"
1012
- icons = {"CRITICAL": "🔴", "HIGH": "🟠", "MEDIUM": "🟡", "LOW": "🟢"}
1013
-
1014
- for issue_type, sev, desc in issues:
1015
- summary += f"{icons.get(sev,'⚪')} **[{sev}] {issue_type}**\n"
1016
- summary += f" - {desc}\n\n"
1017
- else:
1018
- summary += "\n### ✅ No significant issues detected.\n"
1019
-
1020
- if spectral["spectral_notches"]:
1021
- summary += f"\n### 🎵 Spectral Notches: {len(spectral['spectral_notches'])}\n"
1022
- for i, n in enumerate(spectral["spectral_notches"][:5], 1):
1023
- summary += f"{i}. **{n['freq']:.1f} Hz** (Depth: {n['depth_db']:.1f} dB)\n"
1024
-
1025
- summary += f"""
1026
 
1027
- ---
1028
-
1029
- 📊 **Report File:** `{output_filename}`
1030
- 🕒 **Generated:** {audio_data['timestamp']}
1031
-
1032
- """
1033
 
1034
- return str(output_path), summary
1035
 
1036
  except Exception as e:
1037
  import traceback
1038
  traceback.print_exc()
1039
- return None, f"# ❌ Analysis Failed\n\n**Error:** {str(e)}"
 
 
1040
  # ============================================================
1041
- # ============== GRADIO USER INTERFACE =====================
1042
  # ============================================================
1043
 
1044
  with gr.Blocks(title="Audio Forensic Analyzer") as demo:
1045
-
1046
  gr.Markdown("""
1047
- # 🎵 Audio Forensic Analyzer
1048
- Upload an audio file to perform detailed forensic-level analysis.
1049
-
1050
- This tool evaluates:
1051
- - Spectrum balance
1052
- - HF rolloff & filtering
1053
- - Compression
1054
- - Clipping
1055
- - Noise levels
1056
- - Spectral anomalies (notches, brickwalls)
1057
-
1058
- **Supported formats:** WAV, MP3, FLAC, OGG, M4A, AAC
1059
  """)
1060
 
1061
  with gr.Row():
1062
  with gr.Column(scale=1):
1063
- audio_input = gr.Audio(
1064
- label="📁 Upload Audio File",
1065
- type="filepath",
1066
- sources=["upload"]
1067
- )
1068
-
1069
- analyze_btn = gr.Button(
1070
- "🔍 Analyze Audio",
1071
- variant="primary",
1072
- size="lg"
1073
- )
1074
-
1075
  with gr.Column(scale=2):
1076
- report_output = gr.Image(
1077
- label="📊 Analysis Report",
1078
- type="filepath",
1079
- height=600
1080
- )
1081
-
1082
- with gr.Row():
1083
- summary_output = gr.Markdown(label="📋 Analysis Summary")
1084
 
1085
- analyze_btn.click(
1086
- fn=analyze_audio,
1087
- inputs=[audio_input],
1088
- outputs=[report_output, summary_output]
1089
- )
1090
 
 
1091
 
1092
- # ============================================================
1093
- # ============== APP LAUNCH ================================
1094
- # ============================================================
1095
 
1096
  if __name__ == "__main__":
1097
  demo.launch()
 
1
  # ============================================================
2
+ # AUDIO FORENSIC ANALYZERFINAL VERSION WITH SYNTHETIC DETECTOR
3
  # ============================================================
4
 
5
  import gradio as gr
 
24
  LOUDNESS_AVAILABLE = False
25
 
26
 
27
+ # ============================================================
28
+ # READ AUDIO INFO
29
+ # ============================================================
30
 
31
  def read_audio_info(path):
 
32
  info = sf.info(path)
33
  return {
34
  "samplerate": int(info.samplerate),
 
40
  }
41
 
42
 
43
+ # ============================================================
44
+ # TIME-DOMAIN STATS
45
+ # ============================================================
46
+
47
  def compute_time_domain_stats(y):
 
48
  peak = float(np.max(np.abs(y)))
49
  rms = float(np.sqrt(np.mean(y ** 2)))
 
50
  peak_db = 20 * np.log10(max(peak, 1e-12))
51
  rms_db = 20 * np.log10(max(rms, 1e-12))
52
  crest_factor = peak_db - rms_db
 
53
  abs_y = np.abs(y)
54
  noise_floor = float(np.percentile(abs_y, 10))
55
  snr_est = 20 * np.log10(max(rms, 1e-12) / max(noise_floor, 1e-12))
 
68
 
69
 
70
  # ============================================================
71
+ # SPECTRAL ANALYSIS
72
  # ============================================================
73
 
74
  def compute_spectral_analysis(y, sr, n_fft=4096):
75
+ hop = n_fft // 4
76
+ S = np.abs(librosa.stft(y, n_fft=n_fft, hop_length=hop, window="hann"))
 
 
 
 
77
  freqs = np.linspace(0, sr / 2, S.shape[0])
 
 
78
  S_db = librosa.amplitude_to_db(S, ref=np.max)
79
 
 
80
  S_power = S ** 2
81
  energy = np.percentile(S_power, 90, axis=1) + 1e-20
82
  total_energy = float(np.sum(energy))
83
  cum_energy = np.cumsum(energy)
84
 
85
+ idx85 = np.searchsorted(cum_energy, 0.85 * total_energy)
86
+ idx95 = np.searchsorted(cum_energy, 0.95 * total_energy)
 
 
 
 
87
 
88
+ freq85 = float(freqs[min(idx85, len(freqs)-1)])
89
+ freq95 = float(freqs[min(idx95, len(freqs)-1)])
90
 
91
+ mean_db = np.percentile(S_db, 90, axis=1)
92
+ pk = float(np.max(S_db))
93
+ thr = pk - 60
94
+ bins = np.where(mean_db > thr)[0]
95
+ highest_freq = float(freqs[bins[-1]]) if len(bins) else 0.0
96
 
97
+ def band(low, high):
 
 
 
 
98
  i1 = np.searchsorted(freqs, low)
99
  i2 = np.searchsorted(freqs, high)
100
  return float(100 * np.sum(energy[i1:i2]) / total_energy)
101
 
102
+ def band_above(f):
103
  idx = np.searchsorted(freqs, f)
104
  return float(100 * np.sum(energy[idx:]) / total_energy)
105
 
106
  energy_stats = {
107
+ "below_100hz": band(0, 100),
108
+ "100_500hz": band(100, 500),
109
+ "500_2khz": band(500, 2000),
110
+ "2k_8khz": band(2000, 8000),
111
+ "8k_12khz": band(8000, 12000),
112
+ "12k_16khz": band(12000, 16000),
113
+ "above_16khz": band_above(16000)
114
  }
115
 
116
+ diffs = np.diff(mean_db)
117
+ bw_idx = np.where(diffs < -20)[0]
118
+ brick = bool(len(bw_idx))
119
+ brick_freq = float(freqs[bw_idx[0]]) if len(bw_idx) else None
 
120
 
121
+ smooth = sps.medfilt(mean_db, kernel_size=9)
 
122
  minima = sps.argrelextrema(smooth, np.less)[0]
123
  notches = []
 
124
  for m in minima:
125
  left = smooth[max(0, m - 6):m]
126
+ right = smooth[m+1:min(len(smooth), m+7)]
127
+ neigh = max(left.max() if len(left) else -999,
128
+ right.max() if len(right) else -999)
129
+ depth = neigh - smooth[m]
 
 
130
  if depth >= 15 and freqs[m] > 100:
131
+ notches.append({"freq": float(freqs[m]), "depth_db": float(depth)})
 
 
 
132
 
 
133
  centroid = float(np.mean(librosa.feature.spectral_centroid(S=S, sr=sr)))
134
  bandwidth = float(np.mean(librosa.feature.spectral_bandwidth(S=S, sr=sr)))
135
  flatness = float(np.mean(librosa.feature.spectral_flatness(S=S)))
 
138
  return {
139
  "S_db": S_db,
140
  "freqs": freqs,
141
+ "hop_length": hop,
142
+ "rolloff_85pct": freq85,
143
+ "rolloff_95pct": freq95,
 
144
  "highest_freq_minus60db": highest_freq,
145
  "energy_distribution": energy_stats,
146
+ "brick_wall_detected": brick,
147
  "brick_wall_freq": brick_freq,
148
  "spectral_notches": notches,
149
  "spectral_centroid": centroid,
 
151
  "spectral_flatness": flatness,
152
  "spectral_rolloff": rolloff
153
  }
154
+
155
+
156
+ # ============================================================
157
+ # SYNTHETIC VOICE DETECTOR (LIGHTWEIGHT)
158
+ # ============================================================
159
+
160
+ def detect_synthetic_voice(y, sr, spectral):
161
  try:
162
+ mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=40)
163
+ mfcc_std = np.mean(np.std(mfcc, axis=1))
164
+ f0 = librosa.yin(y, 50, 400, sr=sr)
165
+ jitter = np.std(np.diff(f0) / (np.mean(f0) + 1e-6))
166
+
167
+ energy = spectral["energy_distribution"]
168
+ sym = abs(energy["8k_12khz"] - energy["12k_16khz"])
169
+
170
+ cs = []
171
+ for i in range(mfcc.shape[1] - 1):
172
+ v1 = mfcc[:, i]
173
+ v2 = mfcc[:, i+1]
174
+ cs.append(np.dot(v1, v2) /
175
+ (np.linalg.norm(v1) * np.linalg.norm(v2) + 1e-8))
176
+ cos_sim = float(np.mean(cs))
177
+
178
+ score = (
179
+ 1.2 * (cos_sim - 0.85) +
180
+ 0.8 * (0.15 - mfcc_std) +
181
+ 1.0 * (0.02 - jitter) +
182
+ 0.5 * (0.10 - sym)
183
+ )
184
+ prob = 1 / (1 + np.exp(-5 * score))
185
+ prob = float(np.clip(prob, 0, 1))
186
+ label = "AI" if prob > 0.5 else "Human"
187
+ return prob, label
188
+ except:
189
+ return 0.0, "Human"
190
+
191
 
192
  # ============================================================
193
+ # ISSUE DETECTION (Your original logic preserved)
 
 
194
  # ============================================================
195
 
196
  def detect_audio_issues(spectral, time_stats):
 
 
197
  issues = []
198
  energy = spectral["energy_distribution"]
199
  freqs = spectral["freqs"]
200
+ flatness = spectral["spectral_flatness"]
201
+ notches = spectral["spectral_notches"]
 
 
 
 
 
 
 
202
  hf_8_12 = energy["8k_12khz"]
203
+ highf = spectral["highest_freq_minus60db"]
204
+
205
+ if hf_8_12 < 0.01 and highf < 9000:
206
+ issues.append(("HF_LOSS", "HIGH", f"Severe HF cutoff"))
 
 
 
 
 
207
  elif hf_8_12 < 0.02:
208
+ issues.append(("HF_LOSS", "LOW", "Low HF energy"))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
209
 
210
  if spectral["brick_wall_detected"]:
211
+ issues.append(("BRICK_WALL", "HIGH",
212
+ f"Brick-wall at {spectral['brick_wall_freq']:.0f} Hz"))
 
 
213
 
214
+ if flatness > 0.40 and len(notches) >= 3:
215
+ issues.append(("NOISE_REDUCTION_ARTIFACTS", "HIGH", "NR artifacts"))
216
+ elif flatness > 0.35:
217
+ issues.append(("NR_SOFT", "LOW", "Mild noise reduction"))
218
 
219
+ if len(notches):
220
+ issues.append(("SPECTRAL_NOTCHES", "MEDIUM",
221
+ f"{len(notches)} notches detected"))
222
 
223
+ crest = time_stats["crest_factor_db"]
224
  if crest < 3:
225
+ issues.append(("OVER_COMPRESSION", "HIGH",
226
+ f"Crest {crest:.1f} dB"))
 
 
227
  elif crest < 6:
228
+ issues.append(("COMPRESSION", "MEDIUM",
229
+ f"Crest {crest:.1f} dB"))
 
 
 
 
 
 
230
 
231
  if time_stats["peak"] >= 0.999:
232
+ issues.append(("CLIPPING", "CRITICAL",
233
+ "Probable clipping"))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
234
 
235
  return issues
236
 
237
+
238
  # ============================================================
239
+ # REPORT GENERATION (PNG)
240
  # ============================================================
241
 
242
+ def create_report(data, outpath):
 
 
243
  plt.style.use("default")
 
 
244
  fig = plt.figure(figsize=(22, 16))
245
  fig.patch.set_facecolor("white")
246
 
247
  fig.suptitle(
248
+ f"AUDIO FORENSIC ANALYSIS REPORT\n{data['filename']}",
249
+ fontsize=20, fontweight="bold", y=0.97
 
 
250
  )
251
 
252
  gs = gridspec.GridSpec(
253
+ 4, 4, figure=fig,
254
+ hspace=0.5, wspace=0.4,
255
+ height_ratios=[1.6, 1, 1, 1]
 
 
 
 
 
 
256
  )
257
 
258
+ # Spectrogram
259
+ ax = fig.add_subplot(gs[0, :])
260
+ S_db = data["spectral"]["S_db"]
261
+ sr = data["info"]["samplerate"]
262
+ hop = data["spectral"]["hop_length"]
 
 
 
 
263
 
264
  img = librosa.display.specshow(
265
+ S_db, sr=sr, hop_length=hop,
266
+ x_axis="time", y_axis="hz",
267
+ cmap="viridis", ax=ax, vmin=-80, vmax=0
 
 
 
 
 
 
268
  )
269
+ ax.set_title("Spectrogram", fontsize=14)
270
+ plt.colorbar(img, ax=ax)
271
 
272
+ # File info block
273
+ ax2 = fig.add_subplot(gs[1, 0:2])
274
+ ax2.axis("off")
 
 
 
 
 
 
 
 
 
 
 
 
275
 
276
+ info = data["info"]
277
+ t = data["time_stats"]
278
 
279
+ block = [
280
  "FILE INFORMATION",
281
+ f"Sample Rate: {info['samplerate']}",
282
+ f"Channels: {info['channels']}",
283
+ f"Duration: {info['duration']:.2f} sec",
 
 
 
284
  "",
285
+ "TIME-DOMAIN",
286
+ f"Peak: {t['peak_db']:.2f} dBFS",
287
+ f"RMS: {t['rms_db']:.2f} dBFS",
288
+ f"Crest: {t['crest_factor_db']:.2f} dB",
289
+ f"SNR: {t['snr_db']:.1f} dB",
290
+ f"Zero-Cross: {t['zero_crossing_rate']:.4f}",
 
 
291
  ]
292
 
293
+ if data["lufs"] is not None:
294
+ block.append(f"Integrated LUFS: {data['lufs']:.2f}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
295
 
296
+ ax2.text(0.02, 0.98, "\n".join(block), va="top",
297
+ fontsize=11, family="monospace",
298
+ bbox=dict(boxstyle="round", fc="#E8F4F8", ec="#0077BE"))
299
 
300
+ # Spectral stats
301
+ ax3 = fig.add_subplot(gs[1, 2:4])
302
+ ax3.axis("off")
303
+ sp = data["spectral"]
304
+ ed = sp["energy_distribution"]
305
 
306
+ block2 = [
307
  "SPECTRAL ANALYSIS",
308
+ f"Centroid: {sp['spectral_centroid']:.1f}",
309
+ f"Bandwidth: {sp['spectral_bandwidth']:.1f}",
310
+ f"Flatness: {sp['spectral_flatness']:.4f}",
311
+ f"Rolloff 85%: {sp['rolloff_85pct']:.1f}",
312
+ f"Rolloff 95%: {sp['rolloff_95pct']:.1f}",
313
+ f"Highest -60dB: {sp['highest_freq_minus60db']:.1f}",
314
  "",
315
+ "ENERGY DISTRIBUTION",
316
+ *(f"{k}: {v:.2f}%" for k, v in ed.items())
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
317
  ]
318
 
319
+ ax3.text(0.02, 0.98, "\n".join(block2), va="top",
320
+ fontsize=11, family="monospace",
321
+ bbox=dict(boxstyle="round", fc="#FFF4E6", ec="#FF8C00"))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
322
 
323
+ # Issues
324
+ ax4 = fig.add_subplot(gs[2, :])
325
+ ax4.axis("off")
 
 
 
 
 
 
 
 
 
 
 
 
 
326
 
327
+ issues = data["issues"]
328
+ lines = ["DETECTED ISSUES", ""]
329
 
330
+ if not issues:
331
+ lines.append("No major issues detected.")
332
+ else:
333
+ for typ, sev, desc in issues:
334
+ lines.append(f"[{sev}] {typ} → {desc}")
 
335
 
336
+ if sp["spectral_notches"]:
337
+ lines.append("")
338
+ lines.append(f"Spectral Notches: {len(sp['spectral_notches'])}")
339
 
340
+ ax4.text(0.02, 0.98, "\n".join(lines), fontsize=11,
341
+ va="top", family="monospace",
342
+ bbox=dict(boxstyle="round", fc="#FFE6E6", ec="#DC143C"))
 
343
 
344
+ # Quality score + synthetic
345
+ ax5 = fig.add_subplot(gs[3, :])
346
+ ax5.axis("off")
347
 
348
+ crit = sum(1 for _, s, _ in issues if s == "CRITICAL")
349
+ hi = sum(1 for _, s, _ in issues if s == "HIGH")
350
+ med = sum(1 for _, s, _ in issues if s == "MEDIUM")
351
+ low = sum(1 for _, s, _ in issues if s == "LOW")
 
 
 
 
352
 
353
+ score = 100 - (crit * 35 + hi * 20 + med * 8 + low * 3)
354
+ score = np.clip(score, 0, 100)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
355
 
356
+ prob = data["synthetic_prob"]
357
+ label = data["synthetic_label"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
358
 
359
+ block3 = [
360
+ "QUALITY & SYNTHETIC ANALYSIS",
361
+ f"Score: {score:.1f}/100",
362
+ f"Issues → C:{crit}, H:{hi}, M:{med}, L:{low}",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
363
  "",
364
+ "SYNTHETIC DETECTOR",
365
+ f"Probability: {prob:.2f}",
366
+ f"Label: {label}",
367
  "",
368
+ f"Generated: {data['timestamp']}"
 
 
 
 
 
 
 
 
 
369
  ]
370
 
371
+ ax5.text(0.5, 0.5, "\n".join(block3),
372
+ fontsize=11, ha="center", va="center",
373
+ family="monospace",
374
+ bbox=dict(boxstyle="round", fc="#DFFFD8", ec="black"))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
375
 
376
+ plt.savefig(outpath, dpi=300, bbox_inches="tight")
 
 
 
 
 
 
 
377
  plt.close()
378
+ return outpath
379
 
 
380
 
381
  # ============================================================
382
+ # MAIN ANALYSIS FUNCTION
383
  # ============================================================
384
 
385
+ def analyze_audio(file, progress=gr.Progress()):
386
+ if file is None:
387
+ return None, "Please upload an audio file."
 
388
 
389
  try:
390
+ progress(0.1)
391
+ p = Path(file)
392
 
393
+ info = read_audio_info(str(p))
394
+ y, sr = librosa.load(str(p), sr=None, mono=True)
395
 
396
+ progress(0.3)
397
+ tstats = compute_time_domain_stats(y)
398
 
399
+ progress(0.5)
400
+ spec = compute_spectral_analysis(y, sr)
 
401
 
402
+ progress(0.6)
 
 
 
 
 
 
403
  lufs = compute_loudness(y, sr) if LOUDNESS_AVAILABLE else None
404
 
405
+ progress(0.7)
406
+ issues = detect_audio_issues(spec, tstats)
407
+
408
+ progress(0.75)
409
+ prob, label = detect_synthetic_voice(y, sr, spec)
410
 
411
+ data = {
412
+ "filename": p.name,
413
  "info": info,
414
+ "time_stats": tstats,
415
+ "spectral": spec,
416
  "lufs": lufs,
417
  "issues": issues,
418
+ "synthetic_prob": prob,
419
+ "synthetic_label": label,
420
  "timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S")
421
  }
422
 
423
+ outdir = Path("reports")
424
+ outdir.mkdir(exist_ok=True)
425
+ outpng = outdir / f"{p.stem}_report.png"
 
 
 
 
 
426
 
427
+ progress(0.9)
428
+ create_report(data, str(outpng))
 
429
 
430
+ progress(1.0)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
431
 
432
  summary = f"""
433
+ # 🎧 Audio Forensic Analyzer
434
+ ## File: `{p.name}`
 
 
 
 
 
435
 
436
+ ### **Synthetic Detector**
437
+ - Probability: **{prob:.2f}**
438
+ - Label: **{label}**
439
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
440
  ---
441
 
442
+ ### **Quality Metrics**
443
+ - Peak: {tstats['peak_db']:.2f} dBFS
444
+ - RMS: {tstats['rms_db']:.2f} dBFS
445
+ - Crest Factor: {tstats['crest_factor_db']:.2f} dB
446
+ - SNR: {tstats['snr_db']:.1f} dB
 
 
447
 
448
+ ---
449
 
450
+ ### **Spectral**
451
+ - Centroid: {spec['spectral_centroid']:.1f} Hz
452
+ - Rolloff 85%: {spec['rolloff_85pct']:.1f} Hz
453
+ - Highest -60 dB: {spec['highest_freq_minus60db']:.1f} Hz
 
 
 
454
 
455
  ---
456
 
457
+ ### **Issues Detected:** {len(issues)}
458
  """
459
 
460
+ for typ, sev, desc in issues:
461
+ summary += f"- **[{sev}] {typ}** → {desc}\n"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
462
 
463
+ summary += f"\n---\n📊 **Report saved as:** `{outpng.name}`"
 
 
 
 
 
464
 
465
+ return str(outpng), summary
466
 
467
  except Exception as e:
468
  import traceback
469
  traceback.print_exc()
470
+ return None, f"Error: {e}"
471
+
472
+
473
  # ============================================================
474
+ # UI
475
  # ============================================================
476
 
477
  with gr.Blocks(title="Audio Forensic Analyzer") as demo:
 
478
  gr.Markdown("""
479
+ # 🔍 Audio Forensic Analyzer
480
+ Upload an audio file to generate a complete forensic report.
481
+ **Now includes a lightweight AI-vs-Human synthetic detector (informational only).**
 
 
 
 
 
 
 
 
 
482
  """)
483
 
484
  with gr.Row():
485
  with gr.Column(scale=1):
486
+ inp = gr.Audio(label="Upload Audio", type="filepath")
487
+ btn = gr.Button("Analyze", variant="primary")
 
 
 
 
 
 
 
 
 
 
488
  with gr.Column(scale=2):
489
+ img = gr.Image(label="Report", type="filepath", height=600)
 
 
 
 
 
 
 
490
 
491
+ summary = gr.Markdown()
 
 
 
 
492
 
493
+ btn.click(analyze_audio, inputs=inp, outputs=[img, summary])
494
 
 
 
 
495
 
496
  if __name__ == "__main__":
497
  demo.launch()