Mr7Explorer commited on
Commit
956259e
Β·
verified Β·
1 Parent(s): ee0d393

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +473 -296
app.py CHANGED
@@ -1,3 +1,7 @@
 
 
 
 
1
  import gradio as gr
2
  import sys
3
  from pathlib import Path
@@ -23,6 +27,7 @@ except ImportError:
23
  # ==================== ANALYSIS FUNCTIONS ====================
24
 
25
  def read_audio_info(path):
 
26
  info = sf.info(path)
27
  return {
28
  "samplerate": int(info.samplerate),
@@ -35,18 +40,19 @@ def read_audio_info(path):
35
 
36
 
37
  def compute_time_domain_stats(y):
 
38
  peak = float(np.max(np.abs(y)))
39
- rms = float(np.sqrt(np.mean(y**2)))
40
-
41
  peak_db = 20 * np.log10(max(peak, 1e-12))
42
  rms_db = 20 * np.log10(max(rms, 1e-12))
43
  crest_factor = peak_db - rms_db
44
-
45
  abs_y = np.abs(y)
46
  noise_floor = float(np.percentile(abs_y, 10))
47
  snr_est = 20 * np.log10(max(rms, 1e-12) / max(noise_floor, 1e-12))
48
  zcr = float(np.mean(librosa.feature.zero_crossing_rate(y)))
49
-
50
  return {
51
  "peak": peak,
52
  "rms": rms,
@@ -59,76 +65,90 @@ def compute_time_domain_stats(y):
59
  }
60
 
61
 
62
- def compute_spectral_analysis(y, sr, n_fft=8192):
 
 
 
 
 
 
63
  hop_length = n_fft // 4
64
 
65
  # STFT
66
- S = np.abs(librosa.stft(y, n_fft=n_fft, hop_length=hop_length, window='hann'))
67
- freqs = np.linspace(0, sr/2, S.shape[0])
68
 
69
- # dB matrix
70
  S_db = librosa.amplitude_to_db(S, ref=np.max)
71
 
72
- # ===== HYBRID FIX: Percentile-Based Energy =====
73
- S_power = S**2
74
- energy = np.percentile(S_power, 75, axis=1) + 1e-20
75
  total_energy = float(np.sum(energy))
76
-
77
  cum_energy = np.cumsum(energy)
 
 
78
  roll85_idx = np.searchsorted(cum_energy, 0.85 * total_energy)
79
  roll95_idx = np.searchsorted(cum_energy, 0.95 * total_energy)
80
- freq_at_85 = float(freqs[min(roll85_idx, len(freqs)-1)])
81
- freq_at_95 = float(freqs[min(roll95_idx, len(freqs)-1)])
82
 
83
- # ===== HYBRID FIX: 90th percentile dB (instead of mean) =====
 
 
 
84
  mean_db_per_bin = np.percentile(S_db, 90, axis=1)
85
 
86
  peak_db = float(np.max(S_db))
87
- threshold_db = peak_db - 60.0
 
88
  non_silent_bins = np.where(mean_db_per_bin > threshold_db)[0]
89
  highest_freq = float(freqs[non_silent_bins[-1]]) if non_silent_bins.size else 0.0
90
 
91
- # Energy band functions
92
- def energy_above(f):
93
- idx = np.searchsorted(freqs, f)
94
- return float(100.0 * np.sum(energy[idx:]) / total_energy)
 
95
 
96
- def energy_below(f):
97
  idx = np.searchsorted(freqs, f)
98
- return float(100.0 * np.sum(energy[:idx]) / total_energy)
99
 
100
  energy_stats = {
101
- "below_100hz": energy_below(100),
102
- "below_200hz": energy_below(200),
103
- "100_500hz": energy_below(500) - energy_below(100),
104
- "500_2khz": energy_below(2000) - energy_below(500),
105
- "2k_8khz": energy_below(8000) - energy_below(2000),
106
- "above_8khz": energy_above(8000),
107
- "above_12khz": energy_above(12000),
108
- "above_16khz": energy_above(16000),
109
  }
110
 
111
- # Brick-wall detection using new percentile spectrum
112
  diffs = np.diff(mean_db_per_bin)
113
- big_drop_idx = np.where(diffs < -20.0)[0]
114
  brick_wall = bool(big_drop_idx.size)
115
  brick_freq = float(freqs[big_drop_idx[0]]) if big_drop_idx.size else None
116
 
117
- # Spectral notches (unchanged, but uses new mean_db_per_bin)
118
  smooth = sps.medfilt(mean_db_per_bin, kernel_size=9)
119
  minima = sps.argrelextrema(smooth, np.less)[0]
120
  notches = []
 
121
  for m in minima:
122
- left = smooth[max(0, m-6):m]
123
- right = smooth[m+1:min(len(smooth), m+7)]
124
- neighbors_peak = max(
125
  left.max() if left.size else -999,
126
  right.max() if right.size else -999
127
  )
128
- depth = neighbors_peak - smooth[m]
129
- if depth >= 15.0 and freqs[m] > 100:
130
- notches.append({"freq": float(freqs[m]), "depth_db": float(depth)})
131
-
 
 
 
 
132
  centroid = float(np.mean(librosa.feature.spectral_centroid(S=S, sr=sr)))
133
  bandwidth = float(np.mean(librosa.feature.spectral_bandwidth(S=S, sr=sr)))
134
  flatness = float(np.mean(librosa.feature.spectral_flatness(S=S)))
@@ -149,139 +169,182 @@ def compute_spectral_analysis(y, sr, n_fft=8192):
149
  "spectral_centroid": centroid,
150
  "spectral_bandwidth": bandwidth,
151
  "spectral_flatness": flatness,
152
- "spectral_rolloff": rolloff,
153
  }
154
-
155
-
156
- def compute_loudness(y, sr):
157
- if not LOUDNESS_AVAILABLE:
158
- return None
159
- try:
160
- meter = pyln.Meter(sr)
161
- loudness = float(meter.integrated_loudness(y))
162
- return loudness
163
- except Exception:
164
- return None
165
-
166
 
167
  def detect_audio_issues(spectral, time_stats):
 
168
  issues = []
169
  energy = spectral["energy_distribution"]
170
-
171
- if energy["below_200hz"] < 2.0:
172
- issues.append(("HIGH_PASS_FILTER", "HIGH",
173
- f"Very low energy below 200Hz ({energy['below_200hz']:.2f}%). Likely HPF applied."))
174
- elif energy["below_200hz"] < 5.0:
175
- issues.append(("HIGH_PASS_FILTER", "MEDIUM",
176
- f"Low energy below 200Hz ({energy['below_200hz']:.2f}%). Possible mild HPF."))
177
-
178
- if energy["above_12khz"] < 0.2 and spectral["highest_freq_minus60db"] < 12000:
179
- issues.append(("HF_LOSS", "HIGH",
180
- f"Severe HF loss. Only {energy['above_12khz']:.3f}% above 12kHz."))
181
- elif energy["above_12khz"] < 1.0:
182
- issues.append(("HF_LOSS", "MEDIUM",
183
- f"Reduced HF content ({energy['above_12khz']:.2f}% above 12kHz)."))
184
 
185
- if spectral["brick_wall_detected"]:
186
- issues.append(("BRICK_WALL", "HIGH",
187
- f"Brick-wall filter at {spectral['brick_wall_freq']:.0f}Hz."))
 
188
 
189
- if len(spectral["spectral_notches"]) > 0:
190
- issues.append(("SPECTRAL_NOTCHES", "MEDIUM",
191
- f"{len(spectral['spectral_notches'])} spectral notches detected."))
 
 
 
 
 
192
 
193
- if time_stats["crest_factor_db"] < 3.0:
194
- issues.append(("OVER_COMPRESSION", "HIGH",
195
- f"Very low crest factor ({time_stats['crest_factor_db']:.1f}dB). Heavy compression."))
196
- elif time_stats["crest_factor_db"] < 6.0:
197
- issues.append(("COMPRESSION", "MEDIUM",
198
- f"Low crest factor ({time_stats['crest_factor_db']:.1f}dB). Moderate compression."))
199
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
200
  if time_stats["peak"] >= 0.999:
201
- issues.append(("CLIPPING", "CRITICAL",
202
- f"Peak at {time_stats['peak']:.6f}. Possible digital clipping!"))
203
 
204
  return issues
205
 
 
 
 
 
 
206
  def create_report(audio_data, output_path):
207
  """Create comprehensive PNG report"""
208
-
209
- plt.style.use('default')
210
- fig = plt.figure(figsize=(22, 14))
211
- fig.patch.set_facecolor('white')
212
-
213
- fig.suptitle(f'AUDIO FORENSIC ANALYSIS REPORT\n{audio_data["filename"]}',
214
- fontsize=20, fontweight='bold', y=0.97)
215
-
216
- gs = gridspec.GridSpec(4, 4, figure=fig, hspace=0.4, wspace=0.4,
217
- height_ratios=[1.5, 1, 0.8, 0.9],
218
- left=0.05, right=0.95, top=0.92, bottom=0.05)
219
-
220
- # SPECTROGRAM
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
221
  ax_spec = fig.add_subplot(gs[0, :])
222
- S_db = audio_data['spectral']['S_db']
223
- sr = audio_data['info']['samplerate']
224
- hop = audio_data['spectral']['hop_length']
225
-
 
226
  img = librosa.display.specshow(
227
- S_db, sr=sr, hop_length=hop,
228
- x_axis='time', y_axis='hz',
229
- cmap='viridis', ax=ax_spec, vmin=-80, vmax=0
 
 
 
 
 
 
230
  )
231
- ax_spec.set_title('Spectrogram', fontsize=14, fontweight='bold', pad=10)
232
- ax_spec.set_ylabel('Frequency (Hz)', fontsize=11, fontweight='bold')
233
- ax_spec.set_xlabel('Time (seconds)', fontsize=11, fontweight='bold')
234
- ax_spec.grid(True, alpha=0.3, linestyle='--', linewidth=0.5)
235
-
236
- cbar = plt.colorbar(img, ax=ax_spec, format='%+2.0f dB', pad=0.01)
 
237
  cbar.ax.tick_params(labelsize=10)
238
- cbar.set_label('Magnitude (dB)', fontsize=10, fontweight='bold')
239
-
240
- # FILE INFO
 
 
 
241
  ax_info = fig.add_subplot(gs[1, 0:2])
242
- ax_info.axis('off')
243
- info = audio_data['info']
244
- time = audio_data['time_stats']
245
-
 
246
  info_lines = [
247
  "FILE INFORMATION",
248
  "─" * 50,
249
  f"Sample Rate: {info['samplerate']:,} Hz",
250
  f"Channels: {info['channels']}",
251
- f"Duration: {info['duration']:.2f} seconds",
252
  f"Format: {info['format']} ({info['subtype']})",
253
  f"Total Frames: {info['frames']:,}",
254
  "",
255
  "TIME-DOMAIN ANALYSIS",
256
  "─" * 50,
257
- f"Peak Level: {time['peak_db']:.2f} dBFS ({time['peak']:.6f})",
258
- f"RMS Level: {time['rms_db']:.2f} dBFS ({time['rms']:.6f})",
259
  f"Crest Factor: {time['crest_factor_db']:.2f} dB",
260
  f"Noise Floor: {time['noise_floor']:.6f}",
261
  f"Est. SNR: {time['snr_db']:.1f} dB",
262
  f"Zero Cross Rate: {time['zero_crossing_rate']:.4f}",
263
  ]
264
-
265
- if audio_data.get('lufs') is not None:
266
  info_lines.extend([
267
  "",
268
  "LOUDNESS (BS.1770)",
269
  "─" * 50,
270
  f"Integrated LUFS: {audio_data['lufs']:.2f} LUFS"
271
  ])
272
-
273
  info_text = "\n".join(info_lines)
274
- ax_info.text(0.05, 0.95, info_text, transform=ax_info.transAxes,
275
- fontsize=11, verticalalignment='top', family='monospace',
276
- bbox=dict(boxstyle='round,pad=1', facecolor='#E8F4F8',
277
- edgecolor='#0077BE', linewidth=2))
278
-
279
- # SPECTRAL STATS
 
 
 
 
 
 
 
 
 
 
 
 
280
  ax_spectral = fig.add_subplot(gs[1, 2:4])
281
- ax_spectral.axis('off')
282
- spec = audio_data['spectral']
283
- energy = spec['energy_distribution']
284
-
 
285
  spectral_lines = [
286
  "SPECTRAL ANALYSIS",
287
  "─" * 50,
@@ -290,68 +353,118 @@ def create_report(audio_data, output_path):
290
  f"Flatness: {spec['spectral_flatness']:.4f}",
291
  f"Rolloff: {spec['spectral_rolloff']:.1f} Hz",
292
  "",
293
- "FREQUENCY ROLLOFFS",
294
  "─" * 50,
295
  f"85% Energy: {spec['rolloff_85pct']:.1f} Hz",
296
  f"95% Energy: {spec['rolloff_95pct']:.1f} Hz",
297
  f"Highest (-60dB): {spec['highest_freq_minus60db']:.1f} Hz",
298
  "",
299
- "ENERGY DISTRIBUTION BY BAND",
300
  "─" * 50,
301
  f"< 100 Hz: {energy['below_100hz']:.2f}%",
302
- f"100-500 Hz: {energy['100_500hz']:.2f}%",
303
- f"500-2k Hz: {energy['500_2khz']:.2f}%",
304
- f"2k-8k Hz: {energy['2k_8khz']:.2f}%",
305
- f"> 8 kHz: {energy['above_8khz']:.2f}%",
306
- f"> 12 kHz: {energy['above_12khz']:.2f}%",
307
- f"> 16 kHz: {energy['above_16khz']:.2f}%",
308
  ]
309
-
310
  spectral_text = "\n".join(spectral_lines)
311
- ax_spectral.text(0.05, 0.95, spectral_text, transform=ax_spectral.transAxes,
312
- fontsize=11, verticalalignment='top', family='monospace',
313
- bbox=dict(boxstyle='round,pad=1', facecolor='#FFF4E6',
314
- edgecolor='#FF8C00', linewidth=2))
315
-
316
- # ENERGY BAR CHART
 
 
 
 
 
 
 
 
 
 
 
 
 
 
317
  ax_energy = fig.add_subplot(gs[2, :])
318
-
319
- bands = ['<100Hz', '100-500Hz', '500-2kHz', '2k-8kHz', '>8kHz', '>12kHz', '>16kHz']
 
 
 
 
 
 
 
 
 
320
  values = [
321
- energy['below_100hz'],
322
- energy['100_500hz'],
323
- energy['500_2khz'],
324
- energy['2k_8khz'],
325
- energy['above_8khz'],
326
- energy['above_12khz'],
327
- energy['above_16khz']
328
  ]
329
-
330
- colors = ['#2C3E50', '#E74C3C', '#E67E22', '#F39C12', '#2ECC71', '#3498DB', '#9B59B6']
331
- bars = ax_energy.bar(bands, values, color=colors, edgecolor='black', linewidth=1.5, alpha=0.85)
332
-
333
- ax_energy.set_ylabel('Energy Percentage (%)', fontsize=12, fontweight='bold')
334
- ax_energy.set_title('Frequency Band Energy Distribution', fontsize=13, fontweight='bold', pad=10)
335
- ax_energy.grid(axis='y', alpha=0.4, linestyle='--', linewidth=0.8)
336
- ax_energy.set_ylim(0, max(values) * 1.15)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
337
  ax_energy.set_axisbelow(True)
338
-
339
  for bar, val in zip(bars, values):
340
  height = bar.get_height()
341
- ax_energy.text(bar.get_x() + bar.get_width()/2., height + 0.5,
342
- f'{val:.2f}%', ha='center', va='bottom',
343
- fontsize=10, fontweight='bold')
344
-
 
 
 
 
 
 
 
345
  # ISSUES PANEL
 
 
346
  ax_issues = fig.add_subplot(gs[3, 0:3])
347
- ax_issues.axis('off')
348
-
349
- issues = audio_data['issues']
350
-
351
- issue_lines = ["DETECTED ISSUES & WARNINGS", "═" * 80]
352
-
 
 
 
353
  if not issues:
354
- issue_lines.append("βœ… No significant issues detected - Audio quality is good!")
355
  else:
356
  severity_icons = {
357
  "CRITICAL": "πŸ”΄ CRITICAL",
@@ -359,53 +472,73 @@ def create_report(audio_data, output_path):
359
  "MEDIUM": "🟑 MEDIUM",
360
  "LOW": "🟒 LOW"
361
  }
362
-
363
  for issue_type, severity, description in issues:
364
  icon = severity_icons.get(severity, "βšͺ INFO")
365
- issue_lines.append(f"\n{icon} - {issue_type}")
366
  issue_lines.append(f" β†’ {description}")
367
-
368
- if spec['spectral_notches']:
 
369
  issue_lines.append(f"\n🎡 SPECTRAL NOTCHES DETECTED: {len(spec['spectral_notches'])}")
370
- for i, notch in enumerate(spec['spectral_notches'][:5], 1):
371
- issue_lines.append(f" {i}. Frequency: {notch['freq']:.1f} Hz, Depth: {notch['depth_db']:.1f} dB")
372
- if len(spec['spectral_notches']) > 5:
373
- issue_lines.append(f" ... and {len(spec['spectral_notches'])-5} more")
374
-
375
- if spec['brick_wall_detected']:
 
 
 
376
  issue_lines.append(f"\n⚠️ BRICK-WALL FILTER: Detected at {spec['brick_wall_freq']:.0f} Hz")
377
-
378
  issues_text = "\n".join(issue_lines)
379
- ax_issues.text(0.05, 0.95, issues_text, transform=ax_issues.transAxes,
380
- fontsize=11, verticalalignment='top', family='monospace',
381
- bbox=dict(boxstyle='round,pad=1', facecolor='#FFE6E6',
382
- edgecolor='#DC143C', linewidth=2))
383
-
384
- # QUALITY SCORE
 
 
 
 
 
 
 
 
 
 
 
 
385
  ax_score = fig.add_subplot(gs[3, 3])
386
- ax_score.axis('off')
387
-
 
 
 
 
 
 
 
388
  score = 100
389
- critical = sum(1 for _, sev, _ in issues if sev == 'CRITICAL')
390
- high = sum(1 for _, sev, _ in issues if sev == 'HIGH')
391
- medium = sum(1 for _, sev, _ in issues if sev == 'MEDIUM')
392
-
393
  score -= critical * 30
394
  score -= high * 15
395
  score -= medium * 5
396
  score = max(0, score)
397
-
 
398
  if score >= 90:
399
- grade, color, quality = "A", '#00C853', "EXCELLENT"
400
  elif score >= 75:
401
- grade, color, quality = "B", '#64DD17', "GOOD"
402
  elif score >= 60:
403
- grade, color, quality = "C", '#FFD600', "FAIR"
404
  elif score >= 40:
405
- grade, color, quality = "D", '#FF6D00', "POOR"
406
  else:
407
- grade, color, quality = "F", '#D50000', "CRITICAL"
408
-
409
  score_lines = [
410
  "QUALITY ASSESSMENT",
411
  "═" * 28,
@@ -421,55 +554,76 @@ def create_report(audio_data, output_path):
421
  f"🟑 Medium: {medium}",
422
  "",
423
  "─" * 28,
424
- f"Generated:",
425
  f"{audio_data['timestamp']}"
426
  ]
427
-
428
  score_text = "\n".join(score_lines)
429
- ax_score.text(0.5, 0.5, score_text, transform=ax_score.transAxes,
430
- fontsize=11, ha='center', va='center', family='monospace',
431
- bbox=dict(boxstyle='round,pad=1.2', facecolor=color,
432
- edgecolor='black', linewidth=3, alpha=0.7),
433
- fontweight='bold')
434
-
435
- plt.savefig(output_path, dpi=300, bbox_inches='tight',
436
- facecolor='white', edgecolor='none')
437
- plt.close()
438
-
439
- return output_path
440
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
441
 
442
- # ==================== GRADIO INTERFACE ====================
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
443
 
444
  def analyze_audio(audio_file, progress=gr.Progress()):
445
- """Analyze uploaded audio file"""
446
  if audio_file is None:
447
  return None, "⚠️ Please upload an audio file to analyze."
448
-
449
  try:
450
  progress(0.1, desc="Reading audio file...")
451
-
452
  output_dir = Path("reports")
453
  output_dir.mkdir(exist_ok=True)
454
-
455
  path = Path(audio_file)
456
-
457
  progress(0.2, desc="Loading audio data...")
458
  info = read_audio_info(str(path))
459
  y, sr = librosa.load(str(path), sr=None, mono=True)
460
-
461
  progress(0.4, desc="Analyzing time-domain...")
462
  time_stats = compute_time_domain_stats(y)
463
-
464
  progress(0.6, desc="Performing spectral analysis...")
465
  spectral = compute_spectral_analysis(y, sr)
466
-
467
  progress(0.7, desc="Computing loudness...")
468
  lufs = compute_loudness(y, sr) if LOUDNESS_AVAILABLE else None
469
-
470
  progress(0.8, desc="Detecting audio issues...")
471
  issues = detect_audio_issues(spectral, time_stats)
472
-
473
  audio_data = {
474
  "filename": path.name,
475
  "info": info,
@@ -479,24 +633,27 @@ def analyze_audio(audio_file, progress=gr.Progress()):
479
  "issues": issues,
480
  "timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S")
481
  }
482
-
483
  progress(0.9, desc="Generating report...")
484
-
485
  output_filename = path.stem + "_report.png"
486
  output_path = output_dir / output_filename
487
-
488
  create_report(audio_data, str(output_path))
489
-
490
  progress(1.0, desc="Complete!")
491
-
492
- # Calculate quality score
493
- critical = sum(1 for _, sev, _ in issues if sev == 'CRITICAL')
494
- high = sum(1 for _, sev, _ in issues if sev == 'HIGH')
495
- medium = sum(1 for _, sev, _ in issues if sev == 'MEDIUM')
496
-
 
 
 
497
  score = 100 - (critical * 30) - (high * 15) - (medium * 5)
498
  score = max(0, score)
499
-
500
  if score >= 90:
501
  grade, quality, color = "A", "EXCELLENT", "🟒"
502
  elif score >= 75:
@@ -507,102 +664,117 @@ def analyze_audio(audio_file, progress=gr.Progress()):
507
  grade, quality, color = "D", "POOR", "🟠"
508
  else:
509
  grade, quality, color = "F", "CRITICAL", "πŸ”΄"
510
-
511
- energy = spectral['energy_distribution']
512
-
513
- summary = f"""
514
- # 🎡 Analysis Complete! βœ…
515
 
 
 
 
 
 
 
 
 
516
  ## File Information
517
  - **Filename:** `{audio_data['filename']}`
518
- - **Duration:** {info['duration']:.2f} seconds
519
- - **Sample Rate:** {info['samplerate']:,} Hz
520
- - **Channels:** {info['channels']}
521
  - **Format:** {info['format']} ({info['subtype']})
522
 
523
  ---
524
 
525
- ## Quality Assessment
526
-
527
- ### Overall Score: **{score}/100** - Grade **{grade}** {color}
528
  **Quality Rating:** {quality}
529
 
530
  ### Audio Metrics
531
  | Metric | Value |
532
- |--------|-------|
533
  | Peak Level | {time_stats['peak_db']:.2f} dBFS |
534
  | RMS Level | {time_stats['rms_db']:.2f} dBFS |
535
  | Crest Factor | {time_stats['crest_factor_db']:.2f} dB |
536
  | SNR (Est.) | {time_stats['snr_db']:.1f} dB |
537
  """
538
-
539
  if lufs is not None:
540
  summary += f"| Integrated LUFS | {lufs:.2f} LUFS |\n"
541
-
542
  summary += f"""
543
  ---
544
 
545
- ## Spectral Analysis
546
  | Parameter | Value |
547
- |-----------|-------|
548
  | Spectral Centroid | {spectral['spectral_centroid']:.1f} Hz |
549
  | 85% Rolloff | {spectral['rolloff_85pct']:.1f} Hz |
550
  | 95% Rolloff | {spectral['rolloff_95pct']:.1f} Hz |
551
- | Highest Freq (-60dB) | {spectral['highest_freq_minus60db']:.1f} Hz |
552
 
553
- ### Energy Distribution
554
- - **< 100 Hz:** {energy['below_100hz']:.2f}%
555
- - **100-500 Hz:** {energy['100_500hz']:.2f}%
556
- - **500-2k Hz:** {energy['500_2khz']:.2f}%
557
- - **2k-8k Hz:** {energy['2k_8khz']:.2f}%
558
- - **> 8 kHz:** {energy['above_8khz']:.2f}%
559
- - **> 12 kHz:** {energy['above_12khz']:.2f}%
 
 
560
 
561
  ---
562
 
563
  ## Issues Detected: **{len(issues)}**
564
  """
565
-
566
  if issues:
567
  summary += "\n### ⚠️ Detected Issues:\n\n"
568
- severity_icons = {"CRITICAL": "πŸ”΄", "HIGH": "🟠", "MEDIUM": "🟑", "LOW": "🟒"}
569
-
570
- for issue_type, severity, desc in issues:
571
- icon = severity_icons.get(severity, "βšͺ")
572
- summary += f"{icon} **[{severity}] {issue_type}**\n"
573
  summary += f" - {desc}\n\n"
574
  else:
575
- summary += "\n### βœ… No significant issues detected!\n"
576
-
577
- if spectral['spectral_notches']:
578
- summary += f"\n### 🎡 Spectral Notches: {len(spectral['spectral_notches'])}\n\n"
579
- for i, notch in enumerate(spectral['spectral_notches'][:5], 1):
580
- summary += f"{i}. **{notch['freq']:.1f} Hz** (Depth: {notch['depth_db']:.1f} dB)\n"
581
-
582
- summary += f"\n---\n\nπŸ“Š **Report:** `{output_filename}` | πŸ• **Generated:** {audio_data['timestamp']}\n"
583
-
 
 
 
 
 
 
 
584
  return str(output_path), summary
585
-
586
  except Exception as e:
587
  import traceback
588
  traceback.print_exc()
589
  return None, f"# ❌ Analysis Failed\n\n**Error:** {str(e)}"
590
-
591
-
592
- # ==================== CREATE INTERFACE ====================
593
 
594
  with gr.Blocks(title="Audio Forensic Analyzer") as demo:
595
-
596
  gr.Markdown("""
597
- # 🎡 Audio Forensic Analyzer
598
-
599
- Upload an audio file to perform comprehensive forensic analysis.
600
-
601
- **Detects:** Compression, Filtering, Clipping, Spectral Anomalies, and more.
602
-
603
- **Supported formats:** WAV, MP3, FLAC, OGG, M4A, AAC
 
 
 
 
 
604
  """)
605
-
606
  with gr.Row():
607
  with gr.Column(scale=1):
608
  audio_input = gr.Audio(
@@ -610,28 +782,33 @@ with gr.Blocks(title="Audio Forensic Analyzer") as demo:
610
  type="filepath",
611
  sources=["upload"]
612
  )
613
-
614
  analyze_btn = gr.Button(
615
  "πŸ” Analyze Audio",
616
  variant="primary",
617
  size="lg"
618
  )
619
-
620
  with gr.Column(scale=2):
621
  report_output = gr.Image(
622
  label="πŸ“Š Analysis Report",
623
  type="filepath",
624
  height=600
625
  )
626
-
627
  with gr.Row():
628
  summary_output = gr.Markdown(label="πŸ“‹ Analysis Summary")
629
-
630
  analyze_btn.click(
631
  fn=analyze_audio,
632
  inputs=[audio_input],
633
  outputs=[report_output, summary_output]
634
  )
635
 
 
 
 
 
 
636
  if __name__ == "__main__":
637
- demo.launch()
 
1
+ # ============================================================
2
+ # app.py (Updated Full Version β€” Chunk 1: Lines 1–300)
3
+ # ============================================================
4
+
5
  import gradio as gr
6
  import sys
7
  from pathlib import Path
 
27
  # ==================== ANALYSIS FUNCTIONS ====================
28
 
29
  def read_audio_info(path):
30
+ """Read audio file metadata"""
31
  info = sf.info(path)
32
  return {
33
  "samplerate": int(info.samplerate),
 
40
 
41
 
42
  def compute_time_domain_stats(y):
43
+ """Calculate time-domain statistics"""
44
  peak = float(np.max(np.abs(y)))
45
+ rms = float(np.sqrt(np.mean(y ** 2)))
46
+
47
  peak_db = 20 * np.log10(max(peak, 1e-12))
48
  rms_db = 20 * np.log10(max(rms, 1e-12))
49
  crest_factor = peak_db - rms_db
50
+
51
  abs_y = np.abs(y)
52
  noise_floor = float(np.percentile(abs_y, 10))
53
  snr_est = 20 * np.log10(max(rms, 1e-12) / max(noise_floor, 1e-12))
54
  zcr = float(np.mean(librosa.feature.zero_crossing_rate(y)))
55
+
56
  return {
57
  "peak": peak,
58
  "rms": rms,
 
65
  }
66
 
67
 
68
+ # ============================================================
69
+ # UPDATED SPECTRAL ANALYSIS FUNCTION (FFT=4096, 90th percentile)
70
+ # ============================================================
71
+
72
+ def compute_spectral_analysis(y, sr, n_fft=4096):
73
+ """Comprehensive spectral analysis tuned for speech QC"""
74
+
75
  hop_length = n_fft // 4
76
 
77
  # STFT
78
+ S = np.abs(librosa.stft(y, n_fft=n_fft, hop_length=hop_length, window="hann"))
79
+ freqs = np.linspace(0, sr / 2, S.shape[0])
80
 
81
+ # Convert amplitude to dB
82
  S_db = librosa.amplitude_to_db(S, ref=np.max)
83
 
84
+ # ===== UPDATED ENERGY ESTIMATE: 90th percentile of power =====
85
+ S_power = S ** 2
86
+ energy = np.percentile(S_power, 90, axis=1) + 1e-20
87
  total_energy = float(np.sum(energy))
 
88
  cum_energy = np.cumsum(energy)
89
+
90
+ # Rolloffs
91
  roll85_idx = np.searchsorted(cum_energy, 0.85 * total_energy)
92
  roll95_idx = np.searchsorted(cum_energy, 0.95 * total_energy)
 
 
93
 
94
+ freq_at_85 = float(freqs[min(roll85_idx, len(freqs) - 1)])
95
+ freq_at_95 = float(freqs[min(roll95_idx, len(freqs) - 1)])
96
+
97
+ # ===== UPDATED HF ENVELOPE: 90th percentile of dB =====
98
  mean_db_per_bin = np.percentile(S_db, 90, axis=1)
99
 
100
  peak_db = float(np.max(S_db))
101
+ threshold_db = peak_db - 60
102
+
103
  non_silent_bins = np.where(mean_db_per_bin > threshold_db)[0]
104
  highest_freq = float(freqs[non_silent_bins[-1]]) if non_silent_bins.size else 0.0
105
 
106
+ # ===================== UPDATED SPEECH-CENTRIC BANDS =====================
107
+ def band_energy(low, high):
108
+ i1 = np.searchsorted(freqs, low)
109
+ i2 = np.searchsorted(freqs, high)
110
+ return float(100 * np.sum(energy[i1:i2]) / total_energy)
111
 
112
+ def band_energy_above(f):
113
  idx = np.searchsorted(freqs, f)
114
+ return float(100 * np.sum(energy[idx:]) / total_energy)
115
 
116
  energy_stats = {
117
+ "below_100hz": band_energy(0, 100),
118
+ "100_500hz": band_energy(100, 500),
119
+ "500_2khz": band_energy(500, 2000),
120
+ "2k_8khz": band_energy(2000, 8000),
121
+ "8k_12khz": band_energy(8000, 12000),
122
+ "12k_16khz": band_energy(12000, 16000),
123
+ "above_16khz": band_energy_above(16000)
 
124
  }
125
 
126
+ # Brickwall detection
127
  diffs = np.diff(mean_db_per_bin)
128
+ big_drop_idx = np.where(diffs < -20)[0]
129
  brick_wall = bool(big_drop_idx.size)
130
  brick_freq = float(freqs[big_drop_idx[0]]) if big_drop_idx.size else None
131
 
132
+ # Spectral notches
133
  smooth = sps.medfilt(mean_db_per_bin, kernel_size=9)
134
  minima = sps.argrelextrema(smooth, np.less)[0]
135
  notches = []
136
+
137
  for m in minima:
138
+ left = smooth[max(0, m - 6):m]
139
+ right = smooth[m + 1:min(len(smooth), m + 7)]
140
+ neighbor_peak = max(
141
  left.max() if left.size else -999,
142
  right.max() if right.size else -999
143
  )
144
+ depth = neighbor_peak - smooth[m]
145
+ if depth >= 15 and freqs[m] > 100:
146
+ notches.append({
147
+ "freq": float(freqs[m]),
148
+ "depth_db": float(depth)
149
+ })
150
+
151
+ # Additional spectral stats
152
  centroid = float(np.mean(librosa.feature.spectral_centroid(S=S, sr=sr)))
153
  bandwidth = float(np.mean(librosa.feature.spectral_bandwidth(S=S, sr=sr)))
154
  flatness = float(np.mean(librosa.feature.spectral_flatness(S=S)))
 
169
  "spectral_centroid": centroid,
170
  "spectral_bandwidth": bandwidth,
171
  "spectral_flatness": flatness,
172
+ "spectral_rolloff": rolloff
173
  }
174
+ # ============================================================
175
+ # UPDATED ISSUE DETECTION (HF thresholds corrected)
176
+ # ============================================================
 
 
 
 
 
 
 
 
 
177
 
178
  def detect_audio_issues(spectral, time_stats):
179
+ """Detect common audio processing artifacts"""
180
  issues = []
181
  energy = spectral["energy_distribution"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
182
 
183
+ # High-pass detection
184
+ if energy["below_100hz"] < 0.5:
185
+ issues.append(("HIGH_PASS_FILTER", "HIGH",
186
+ f"Very low energy <100Hz ({energy['below_100hz']:.2f}%). Possible HPF."))
187
 
188
+ # Updated HF-loss rules (speech-appropriate)
189
+ if energy["8k_12khz"] < 0.05 and spectral["highest_freq_minus60db"] < 8000:
190
+ issues.append(("HF_LOSS", "HIGH",
191
+ f"Severe HF loss. Only {energy['8k_12khz']:.3f}% in 8–12kHz."))
192
+
193
+ elif energy["8k_12khz"] < 0.3:
194
+ issues.append(("HF_LOSS", "MEDIUM",
195
+ f"Reduced HF content ({energy['8k_12khz']:.3f}% in 8–12kHz)."))
196
 
197
+ # Brickwall filter
198
+ if spectral["brick_wall_detected"]:
199
+ issues.append(("BRICK_WALL", "HIGH",
200
+ f"Possible brick-wall at {spectral['brick_wall_freq']:.0f} Hz"))
 
 
201
 
202
+ # Spectral notches
203
+ if len(spectral["spectral_notches"]) > 0:
204
+ issues.append(("SPECTRAL_NOTCHES", "MEDIUM",
205
+ f"{len(spectral['spectral_notches'])} spectral notches detected."))
206
+
207
+ # Compression
208
+ if time_stats["crest_factor_db"] < 3:
209
+ issues.append(("OVER_COMPRESSION", "HIGH",
210
+ f"Very low crest factor {time_stats['crest_factor_db']:.1f} dB"))
211
+ elif time_stats["crest_factor_db"] < 6:
212
+ issues.append(("COMPRESSION", "MEDIUM",
213
+ f"Low crest factor {time_stats['crest_factor_db']:.1f} dB"))
214
+
215
+ # Clipping
216
  if time_stats["peak"] >= 0.999:
217
+ issues.append(("CLIPPING", "CRITICAL",
218
+ f"Peak amplitude {time_stats['peak']:.6f}. Possible clipping."))
219
 
220
  return issues
221
 
222
+
223
+ # ============================================================
224
+ # REPORT GENERATION
225
+ # ============================================================
226
+
227
  def create_report(audio_data, output_path):
228
  """Create comprehensive PNG report"""
229
+
230
+ plt.style.use("default")
231
+
232
+ # UPDATED FIGURE SIZE
233
+ fig = plt.figure(figsize=(22, 16))
234
+ fig.patch.set_facecolor("white")
235
+
236
+ fig.suptitle(
237
+ f"AUDIO FORENSIC ANALYSIS REPORT\n{audio_data['filename']}",
238
+ fontsize=20,
239
+ fontweight="bold",
240
+ y=0.97
241
+ )
242
+
243
+ gs = gridspec.GridSpec(
244
+ 4, 4,
245
+ figure=fig,
246
+ hspace=0.4,
247
+ wspace=0.4,
248
+ height_ratios=[1.5, 1, 0.8, 0.9],
249
+ left=0.05,
250
+ right=0.95,
251
+ top=0.92,
252
+ bottom=0.05
253
+ )
254
+
255
+ # ============================
256
+ # SPECTROGRAM PLOT (UPDATED)
257
+ # ============================
258
+
259
  ax_spec = fig.add_subplot(gs[0, :])
260
+
261
+ S_db = audio_data["spectral"]["S_db"]
262
+ sr = audio_data["info"]["samplerate"]
263
+ hop = audio_data["spectral"]["hop_length"]
264
+
265
  img = librosa.display.specshow(
266
+ S_db,
267
+ sr=sr,
268
+ hop_length=hop,
269
+ y_axis="hz",
270
+ x_axis="time",
271
+ cmap="viridis",
272
+ ax=ax_spec,
273
+ vmin=-80,
274
+ vmax=0
275
  )
276
+
277
+ ax_spec.set_title("Spectrogram", fontsize=14, fontweight="bold", pad=10)
278
+ ax_spec.set_ylabel("Frequency (Hz)", fontsize=11, fontweight="bold")
279
+ ax_spec.set_xlabel("Time (seconds)", fontsize=11, fontweight="bold")
280
+ ax_spec.grid(True, alpha=0.3, linestyle="--", linewidth=0.5)
281
+
282
+ cbar = plt.colorbar(img, ax=ax_spec, format="%+2.0f dB", pad=0.01)
283
  cbar.ax.tick_params(labelsize=10)
284
+ cbar.set_label("Magnitude (dB)", fontsize=10, fontweight="bold")
285
+
286
+ # ============================
287
+ # FILE INFO BLOCK
288
+ # ============================
289
+
290
  ax_info = fig.add_subplot(gs[1, 0:2])
291
+ ax_info.axis("off")
292
+
293
+ info = audio_data["info"]
294
+ time = audio_data["time_stats"]
295
+
296
  info_lines = [
297
  "FILE INFORMATION",
298
  "─" * 50,
299
  f"Sample Rate: {info['samplerate']:,} Hz",
300
  f"Channels: {info['channels']}",
301
+ f"Duration: {info['duration']:.2f} sec",
302
  f"Format: {info['format']} ({info['subtype']})",
303
  f"Total Frames: {info['frames']:,}",
304
  "",
305
  "TIME-DOMAIN ANALYSIS",
306
  "─" * 50,
307
+ f"Peak Level: {time['peak_db']:.2f} dBFS ({time['peak']:.6f})",
308
+ f"RMS Level: {time['rms_db']:.2f} dBFS ({time['rms']:.6f})",
309
  f"Crest Factor: {time['crest_factor_db']:.2f} dB",
310
  f"Noise Floor: {time['noise_floor']:.6f}",
311
  f"Est. SNR: {time['snr_db']:.1f} dB",
312
  f"Zero Cross Rate: {time['zero_crossing_rate']:.4f}",
313
  ]
314
+
315
+ if audio_data.get("lufs") is not None:
316
  info_lines.extend([
317
  "",
318
  "LOUDNESS (BS.1770)",
319
  "─" * 50,
320
  f"Integrated LUFS: {audio_data['lufs']:.2f} LUFS"
321
  ])
322
+
323
  info_text = "\n".join(info_lines)
324
+
325
+ ax_info.text(
326
+ 0.05, 0.95, info_text,
327
+ transform=ax_info.transAxes,
328
+ fontsize=11,
329
+ verticalalignment="top",
330
+ family="monospace",
331
+ bbox=dict(
332
+ boxstyle="round,pad=1",
333
+ facecolor="#E8F4F8",
334
+ edgecolor="#0077BE",
335
+ linewidth=2
336
+ )
337
+ )
338
+ # ============================
339
+ # SPECTRAL STATS PANEL
340
+ # ============================
341
+
342
  ax_spectral = fig.add_subplot(gs[1, 2:4])
343
+ ax_spectral.axis("off")
344
+
345
+ spec = audio_data["spectral"]
346
+ energy = spec["energy_distribution"]
347
+
348
  spectral_lines = [
349
  "SPECTRAL ANALYSIS",
350
  "─" * 50,
 
353
  f"Flatness: {spec['spectral_flatness']:.4f}",
354
  f"Rolloff: {spec['spectral_rolloff']:.1f} Hz",
355
  "",
356
+ "FREQUENCY ROLLOFF POINTS",
357
  "─" * 50,
358
  f"85% Energy: {spec['rolloff_85pct']:.1f} Hz",
359
  f"95% Energy: {spec['rolloff_95pct']:.1f} Hz",
360
  f"Highest (-60dB): {spec['highest_freq_minus60db']:.1f} Hz",
361
  "",
362
+ "ENERGY DISTRIBUTION (Speech Bands)",
363
  "─" * 50,
364
  f"< 100 Hz: {energy['below_100hz']:.2f}%",
365
+ f"100–500 Hz: {energy['100_500hz']:.2f}%",
366
+ f"500–2k Hz: {energy['500_2khz']:.2f}%",
367
+ f"2k–8k Hz: {energy['2k_8khz']:.2f}%",
368
+ f"8k–12k Hz: {energy['8k_12khz']:.2f}%",
369
+ f"12k–16k Hz: {energy['12k_16khz']:.2f}%",
370
+ f"> 16k Hz: {energy['above_16khz']:.2f}%",
371
  ]
372
+
373
  spectral_text = "\n".join(spectral_lines)
374
+
375
+ ax_spectral.text(
376
+ 0.05, 0.95, spectral_text,
377
+ transform=ax_spectral.transAxes,
378
+ fontsize=11,
379
+ verticalalignment="top",
380
+ family="monospace",
381
+ bbox=dict(
382
+ boxstyle="round,pad=1",
383
+ facecolor="#FFF4E6",
384
+ edgecolor="#FF8C00",
385
+ linewidth=2
386
+ )
387
+ )
388
+
389
+
390
+ # ============================
391
+ # ENERGY DISTRIBUTION BAR CHART
392
+ # ============================
393
+
394
  ax_energy = fig.add_subplot(gs[2, :])
395
+
396
+ bands = [
397
+ "<100Hz",
398
+ "100–500Hz",
399
+ "500–2kHz",
400
+ "2k–8kHz",
401
+ "8k–12kHz",
402
+ "12k–16kHz",
403
+ ">16kHz"
404
+ ]
405
+
406
  values = [
407
+ energy["below_100hz"],
408
+ energy["100_500hz"],
409
+ energy["500_2khz"],
410
+ energy["2k_8khz"],
411
+ energy["8k_12khz"],
412
+ energy["12k_16khz"],
413
+ energy["above_16khz"]
414
  ]
415
+
416
+ colors = [
417
+ "#2C3E50",
418
+ "#E74C3C",
419
+ "#E67E22",
420
+ "#F39C12",
421
+ "#2ECC71",
422
+ "#3498DB",
423
+ "#9B59B6"
424
+ ]
425
+
426
+ bars = ax_energy.bar(
427
+ bands, values,
428
+ color=colors,
429
+ edgecolor="black",
430
+ linewidth=1.5,
431
+ alpha=0.85
432
+ )
433
+
434
+ ax_energy.set_ylabel("Energy Percentage (%)", fontsize=12, fontweight="bold")
435
+ ax_energy.set_title("Frequency Band Energy Distribution", fontsize=13, fontweight="bold", pad=10)
436
+ ax_energy.grid(axis="y", alpha=0.4, linestyle="--", linewidth=0.8)
437
+ ax_energy.set_ylim(0, max(values) * 1.15 if max(values) > 0 else 1)
438
  ax_energy.set_axisbelow(True)
439
+
440
  for bar, val in zip(bars, values):
441
  height = bar.get_height()
442
+ ax_energy.text(
443
+ bar.get_x() + bar.get_width() / 2., height + 0.5,
444
+ f"{val:.2f}%",
445
+ ha="center",
446
+ va="bottom",
447
+ fontsize=10,
448
+ fontweight="bold"
449
+ )
450
+
451
+
452
+ # ============================
453
  # ISSUES PANEL
454
+ # ============================
455
+
456
  ax_issues = fig.add_subplot(gs[3, 0:3])
457
+ ax_issues.axis("off")
458
+
459
+ issues = audio_data["issues"]
460
+
461
+ issue_lines = [
462
+ "DETECTED ISSUES & WARNINGS",
463
+ "═" * 80
464
+ ]
465
+
466
  if not issues:
467
+ issue_lines.append("βœ… No significant issues detected.")
468
  else:
469
  severity_icons = {
470
  "CRITICAL": "πŸ”΄ CRITICAL",
 
472
  "MEDIUM": "🟑 MEDIUM",
473
  "LOW": "🟒 LOW"
474
  }
475
+
476
  for issue_type, severity, description in issues:
477
  icon = severity_icons.get(severity, "βšͺ INFO")
478
+ issue_lines.append(f"\n{icon} β€” {issue_type}")
479
  issue_lines.append(f" β†’ {description}")
480
+
481
+ # If spectral notches exist, list them
482
+ if spec["spectral_notches"]:
483
  issue_lines.append(f"\n🎡 SPECTRAL NOTCHES DETECTED: {len(spec['spectral_notches'])}")
484
+ for i, notch in enumerate(spec["spectral_notches"][:5], start=1):
485
+ issue_lines.append(
486
+ f" {i}. Frequency: {notch['freq']:.1f} Hz, Depth: {notch['depth_db']:.1f} dB"
487
+ )
488
+ if len(spec["spectral_notches"]) > 5:
489
+ issue_lines.append(f" ... and {len(spec['spectral_notches']) - 5} more")
490
+
491
+ # Brickwall detection notice
492
+ if spec["brick_wall_detected"]:
493
  issue_lines.append(f"\n⚠️ BRICK-WALL FILTER: Detected at {spec['brick_wall_freq']:.0f} Hz")
494
+
495
  issues_text = "\n".join(issue_lines)
496
+
497
+ ax_issues.text(
498
+ 0.05, 0.95, issues_text,
499
+ transform=ax_issues.transAxes,
500
+ fontsize=11,
501
+ verticalalignment="top",
502
+ family="monospace",
503
+ bbox=dict(
504
+ boxstyle="round,pad=1",
505
+ facecolor="#FFE6E6",
506
+ edgecolor="#DC143C",
507
+ linewidth=2
508
+ )
509
+ )
510
+ # ============================
511
+ # QUALITY SCORE PANEL
512
+ # ============================
513
+
514
  ax_score = fig.add_subplot(gs[3, 3])
515
+ ax_score.axis("off")
516
+
517
+ issues = audio_data["issues"]
518
+
519
+ # Score penalties
520
+ critical = sum(1 for _, sev, _ in issues if sev == "CRITICAL")
521
+ high = sum(1 for _, sev, _ in issues if sev == "HIGH")
522
+ medium = sum(1 for _, sev, _ in issues if sev == "MEDIUM")
523
+
524
  score = 100
 
 
 
 
525
  score -= critical * 30
526
  score -= high * 15
527
  score -= medium * 5
528
  score = max(0, score)
529
+
530
+ # Grade + Color
531
  if score >= 90:
532
+ grade, quality, color = "A", "EXCELLENT", "#00C853"
533
  elif score >= 75:
534
+ grade, quality, color = "B", "GOOD", "#64DD17"
535
  elif score >= 60:
536
+ grade, quality, color = "C", "FAIR", "#FFD600"
537
  elif score >= 40:
538
+ grade, quality, color = "D", "POOR", "#FF6D00"
539
  else:
540
+ grade, quality, color = "F", "CRITICAL", "#D50000"
541
+
542
  score_lines = [
543
  "QUALITY ASSESSMENT",
544
  "═" * 28,
 
554
  f"🟑 Medium: {medium}",
555
  "",
556
  "─" * 28,
557
+ "Generated:",
558
  f"{audio_data['timestamp']}"
559
  ]
560
+
561
  score_text = "\n".join(score_lines)
 
 
 
 
 
 
 
 
 
 
 
562
 
563
+ ax_score.text(
564
+ 0.5, 0.5, score_text,
565
+ transform=ax_score.transAxes,
566
+ fontsize=11,
567
+ ha="center",
568
+ va="center",
569
+ family="monospace",
570
+ bbox=dict(
571
+ boxstyle="round,pad=1.2",
572
+ facecolor=color,
573
+ edgecolor="black",
574
+ linewidth=3,
575
+ alpha=0.75
576
+ ),
577
+ fontweight="bold"
578
+ )
579
 
580
+ # ============================
581
+ # SAVE REPORT
582
+ # ============================
583
+
584
+ plt.savefig(
585
+ output_path,
586
+ dpi=300,
587
+ bbox_inches="tight",
588
+ facecolor="white",
589
+ edgecolor="none"
590
+ )
591
+ plt.close()
592
+
593
+ return output_path
594
+ # ============================================================
595
+ # MAIN ANALYSIS FUNCTION (GRADIO CALLBACK)
596
+ # ============================================================
597
 
598
  def analyze_audio(audio_file, progress=gr.Progress()):
599
+ """Analyze uploaded audio file."""
600
  if audio_file is None:
601
  return None, "⚠️ Please upload an audio file to analyze."
602
+
603
  try:
604
  progress(0.1, desc="Reading audio file...")
605
+
606
  output_dir = Path("reports")
607
  output_dir.mkdir(exist_ok=True)
608
+
609
  path = Path(audio_file)
610
+
611
  progress(0.2, desc="Loading audio data...")
612
  info = read_audio_info(str(path))
613
  y, sr = librosa.load(str(path), sr=None, mono=True)
614
+
615
  progress(0.4, desc="Analyzing time-domain...")
616
  time_stats = compute_time_domain_stats(y)
617
+
618
  progress(0.6, desc="Performing spectral analysis...")
619
  spectral = compute_spectral_analysis(y, sr)
620
+
621
  progress(0.7, desc="Computing loudness...")
622
  lufs = compute_loudness(y, sr) if LOUDNESS_AVAILABLE else None
623
+
624
  progress(0.8, desc="Detecting audio issues...")
625
  issues = detect_audio_issues(spectral, time_stats)
626
+
627
  audio_data = {
628
  "filename": path.name,
629
  "info": info,
 
633
  "issues": issues,
634
  "timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S")
635
  }
636
+
637
  progress(0.9, desc="Generating report...")
638
+
639
  output_filename = path.stem + "_report.png"
640
  output_path = output_dir / output_filename
641
+
642
  create_report(audio_data, str(output_path))
643
+
644
  progress(1.0, desc="Complete!")
645
+
646
+ # ============================
647
+ # SCORE COMPUTATION
648
+ # ============================
649
+
650
+ critical = sum(1 for _, sev, _ in issues if sev == "CRITICAL")
651
+ high = sum(1 for _, sev, _ in issues if sev == "HIGH")
652
+ medium = sum(1 for _, sev, _ in issues if sev == "MEDIUM")
653
+
654
  score = 100 - (critical * 30) - (high * 15) - (medium * 5)
655
  score = max(0, score)
656
+
657
  if score >= 90:
658
  grade, quality, color = "A", "EXCELLENT", "🟒"
659
  elif score >= 75:
 
664
  grade, quality, color = "D", "POOR", "🟠"
665
  else:
666
  grade, quality, color = "F", "CRITICAL", "πŸ”΄"
 
 
 
 
 
667
 
668
+ energy = spectral["energy_distribution"]
669
+
670
+ # ============================
671
+ # SUMMARY OUTPUT (Markdown)
672
+ # ============================
673
+
674
+ summary = f"""
675
+ # 🎡 Analysis Complete!
676
  ## File Information
677
  - **Filename:** `{audio_data['filename']}`
678
+ - **Duration:** {info['duration']:.2f} sec
679
+ - **Sample Rate:** {info['samplerate']:,} Hz
680
+ - **Channels:** {info['channels']}
681
  - **Format:** {info['format']} ({info['subtype']})
682
 
683
  ---
684
 
685
+ ## Quality Assessment
686
+ ### Overall Score: **{score}/100** β€” Grade **{grade}** {color}
 
687
  **Quality Rating:** {quality}
688
 
689
  ### Audio Metrics
690
  | Metric | Value |
691
+ |--------|--------|
692
  | Peak Level | {time_stats['peak_db']:.2f} dBFS |
693
  | RMS Level | {time_stats['rms_db']:.2f} dBFS |
694
  | Crest Factor | {time_stats['crest_factor_db']:.2f} dB |
695
  | SNR (Est.) | {time_stats['snr_db']:.1f} dB |
696
  """
697
+
698
  if lufs is not None:
699
  summary += f"| Integrated LUFS | {lufs:.2f} LUFS |\n"
700
+
701
  summary += f"""
702
  ---
703
 
704
+ ## Spectral Analysis
705
  | Parameter | Value |
706
+ |-----------|--------|
707
  | Spectral Centroid | {spectral['spectral_centroid']:.1f} Hz |
708
  | 85% Rolloff | {spectral['rolloff_85pct']:.1f} Hz |
709
  | 95% Rolloff | {spectral['rolloff_95pct']:.1f} Hz |
710
+ | Highest Freq (–60 dB) | {spectral['highest_freq_minus60db']:.1f} Hz |
711
 
712
+ ### Energy Distribution (Speech Bands)
713
+
714
+ - **<100 Hz:** {energy['below_100hz']:.2f}%
715
+ - **100–500 Hz:** {energy['100_500hz']:.2f}%
716
+ - **500–2k Hz:** {energy['500_2khz']:.2f}%
717
+ - **2k–8k Hz:** {energy['2k_8khz']:.2f}%
718
+ - **8k–12k Hz:** {energy['8k_12khz']:.2f}%
719
+ - **12k–16k Hz:** {energy['12k_16khz']:.2f}%
720
+ - **>16k Hz:** {energy['above_16khz']:.2f}%
721
 
722
  ---
723
 
724
  ## Issues Detected: **{len(issues)}**
725
  """
726
+
727
  if issues:
728
  summary += "\n### ⚠️ Detected Issues:\n\n"
729
+ icons = {"CRITICAL": "πŸ”΄", "HIGH": "🟠", "MEDIUM": "🟑", "LOW": "🟒"}
730
+
731
+ for issue_type, sev, desc in issues:
732
+ summary += f"{icons.get(sev,'βšͺ')} **[{sev}] {issue_type}**\n"
 
733
  summary += f" - {desc}\n\n"
734
  else:
735
+ summary += "\n### βœ… No significant issues detected.\n"
736
+
737
+ if spectral["spectral_notches"]:
738
+ summary += f"\n### 🎡 Spectral Notches: {len(spectral['spectral_notches'])}\n"
739
+ for i, n in enumerate(spectral["spectral_notches"][:5], 1):
740
+ summary += f"{i}. **{n['freq']:.1f} Hz** (Depth: {n['depth_db']:.1f} dB)\n"
741
+
742
+ summary += f"""
743
+
744
+ ---
745
+
746
+ πŸ“Š **Report File:** `{output_filename}`
747
+ πŸ•’ **Generated:** {audio_data['timestamp']}
748
+
749
+ """
750
+
751
  return str(output_path), summary
752
+
753
  except Exception as e:
754
  import traceback
755
  traceback.print_exc()
756
  return None, f"# ❌ Analysis Failed\n\n**Error:** {str(e)}"
757
+ # ============================================================
758
+ # ============== GRADIO USER INTERFACE =====================
759
+ # ============================================================
760
 
761
  with gr.Blocks(title="Audio Forensic Analyzer") as demo:
762
+
763
  gr.Markdown("""
764
+ # 🎡 Audio Forensic Analyzer
765
+ Upload an audio file to perform detailed forensic-level analysis.
766
+
767
+ This tool evaluates:
768
+ - Spectrum balance
769
+ - HF rolloff & filtering
770
+ - Compression
771
+ - Clipping
772
+ - Noise levels
773
+ - Spectral anomalies (notches, brickwalls)
774
+
775
+ **Supported formats:** WAV, MP3, FLAC, OGG, M4A, AAC
776
  """)
777
+
778
  with gr.Row():
779
  with gr.Column(scale=1):
780
  audio_input = gr.Audio(
 
782
  type="filepath",
783
  sources=["upload"]
784
  )
785
+
786
  analyze_btn = gr.Button(
787
  "πŸ” Analyze Audio",
788
  variant="primary",
789
  size="lg"
790
  )
791
+
792
  with gr.Column(scale=2):
793
  report_output = gr.Image(
794
  label="πŸ“Š Analysis Report",
795
  type="filepath",
796
  height=600
797
  )
798
+
799
  with gr.Row():
800
  summary_output = gr.Markdown(label="πŸ“‹ Analysis Summary")
801
+
802
  analyze_btn.click(
803
  fn=analyze_audio,
804
  inputs=[audio_input],
805
  outputs=[report_output, summary_output]
806
  )
807
 
808
+
809
+ # ============================================================
810
+ # ============== APP LAUNCH ================================
811
+ # ============================================================
812
+
813
  if __name__ == "__main__":
814
+ demo.launch()