Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,3 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
import gradio as gr
|
| 2 |
import sys
|
| 3 |
from pathlib import Path
|
|
@@ -23,6 +27,7 @@ except ImportError:
|
|
| 23 |
# ==================== ANALYSIS FUNCTIONS ====================
|
| 24 |
|
| 25 |
def read_audio_info(path):
|
|
|
|
| 26 |
info = sf.info(path)
|
| 27 |
return {
|
| 28 |
"samplerate": int(info.samplerate),
|
|
@@ -35,18 +40,19 @@ def read_audio_info(path):
|
|
| 35 |
|
| 36 |
|
| 37 |
def compute_time_domain_stats(y):
|
|
|
|
| 38 |
peak = float(np.max(np.abs(y)))
|
| 39 |
-
rms = float(np.sqrt(np.mean(y**2)))
|
| 40 |
-
|
| 41 |
peak_db = 20 * np.log10(max(peak, 1e-12))
|
| 42 |
rms_db = 20 * np.log10(max(rms, 1e-12))
|
| 43 |
crest_factor = peak_db - rms_db
|
| 44 |
-
|
| 45 |
abs_y = np.abs(y)
|
| 46 |
noise_floor = float(np.percentile(abs_y, 10))
|
| 47 |
snr_est = 20 * np.log10(max(rms, 1e-12) / max(noise_floor, 1e-12))
|
| 48 |
zcr = float(np.mean(librosa.feature.zero_crossing_rate(y)))
|
| 49 |
-
|
| 50 |
return {
|
| 51 |
"peak": peak,
|
| 52 |
"rms": rms,
|
|
@@ -59,76 +65,90 @@ def compute_time_domain_stats(y):
|
|
| 59 |
}
|
| 60 |
|
| 61 |
|
| 62 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 63 |
hop_length = n_fft // 4
|
| 64 |
|
| 65 |
# STFT
|
| 66 |
-
S = np.abs(librosa.stft(y, n_fft=n_fft, hop_length=hop_length, window=
|
| 67 |
-
freqs = np.linspace(0, sr/2, S.shape[0])
|
| 68 |
|
| 69 |
-
#
|
| 70 |
S_db = librosa.amplitude_to_db(S, ref=np.max)
|
| 71 |
|
| 72 |
-
# =====
|
| 73 |
-
S_power = S**2
|
| 74 |
-
energy = np.percentile(S_power,
|
| 75 |
total_energy = float(np.sum(energy))
|
| 76 |
-
|
| 77 |
cum_energy = np.cumsum(energy)
|
|
|
|
|
|
|
| 78 |
roll85_idx = np.searchsorted(cum_energy, 0.85 * total_energy)
|
| 79 |
roll95_idx = np.searchsorted(cum_energy, 0.95 * total_energy)
|
| 80 |
-
freq_at_85 = float(freqs[min(roll85_idx, len(freqs)-1)])
|
| 81 |
-
freq_at_95 = float(freqs[min(roll95_idx, len(freqs)-1)])
|
| 82 |
|
| 83 |
-
|
|
|
|
|
|
|
|
|
|
| 84 |
mean_db_per_bin = np.percentile(S_db, 90, axis=1)
|
| 85 |
|
| 86 |
peak_db = float(np.max(S_db))
|
| 87 |
-
threshold_db = peak_db - 60
|
|
|
|
| 88 |
non_silent_bins = np.where(mean_db_per_bin > threshold_db)[0]
|
| 89 |
highest_freq = float(freqs[non_silent_bins[-1]]) if non_silent_bins.size else 0.0
|
| 90 |
|
| 91 |
-
#
|
| 92 |
-
def
|
| 93 |
-
|
| 94 |
-
|
|
|
|
| 95 |
|
| 96 |
-
def
|
| 97 |
idx = np.searchsorted(freqs, f)
|
| 98 |
-
return float(100
|
| 99 |
|
| 100 |
energy_stats = {
|
| 101 |
-
"below_100hz":
|
| 102 |
-
"
|
| 103 |
-
"
|
| 104 |
-
"
|
| 105 |
-
"
|
| 106 |
-
"
|
| 107 |
-
"
|
| 108 |
-
"above_16khz": energy_above(16000),
|
| 109 |
}
|
| 110 |
|
| 111 |
-
#
|
| 112 |
diffs = np.diff(mean_db_per_bin)
|
| 113 |
-
big_drop_idx = np.where(diffs < -20
|
| 114 |
brick_wall = bool(big_drop_idx.size)
|
| 115 |
brick_freq = float(freqs[big_drop_idx[0]]) if big_drop_idx.size else None
|
| 116 |
|
| 117 |
-
# Spectral notches
|
| 118 |
smooth = sps.medfilt(mean_db_per_bin, kernel_size=9)
|
| 119 |
minima = sps.argrelextrema(smooth, np.less)[0]
|
| 120 |
notches = []
|
|
|
|
| 121 |
for m in minima:
|
| 122 |
-
left = smooth[max(0, m-6):m]
|
| 123 |
-
right = smooth[m+1:min(len(smooth), m+7)]
|
| 124 |
-
|
| 125 |
left.max() if left.size else -999,
|
| 126 |
right.max() if right.size else -999
|
| 127 |
)
|
| 128 |
-
depth =
|
| 129 |
-
if depth >= 15
|
| 130 |
-
notches.append({
|
| 131 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 132 |
centroid = float(np.mean(librosa.feature.spectral_centroid(S=S, sr=sr)))
|
| 133 |
bandwidth = float(np.mean(librosa.feature.spectral_bandwidth(S=S, sr=sr)))
|
| 134 |
flatness = float(np.mean(librosa.feature.spectral_flatness(S=S)))
|
|
@@ -149,139 +169,182 @@ def compute_spectral_analysis(y, sr, n_fft=8192):
|
|
| 149 |
"spectral_centroid": centroid,
|
| 150 |
"spectral_bandwidth": bandwidth,
|
| 151 |
"spectral_flatness": flatness,
|
| 152 |
-
"spectral_rolloff": rolloff
|
| 153 |
}
|
| 154 |
-
|
| 155 |
-
|
| 156 |
-
|
| 157 |
-
if not LOUDNESS_AVAILABLE:
|
| 158 |
-
return None
|
| 159 |
-
try:
|
| 160 |
-
meter = pyln.Meter(sr)
|
| 161 |
-
loudness = float(meter.integrated_loudness(y))
|
| 162 |
-
return loudness
|
| 163 |
-
except Exception:
|
| 164 |
-
return None
|
| 165 |
-
|
| 166 |
|
| 167 |
def detect_audio_issues(spectral, time_stats):
|
|
|
|
| 168 |
issues = []
|
| 169 |
energy = spectral["energy_distribution"]
|
| 170 |
-
|
| 171 |
-
if energy["below_200hz"] < 2.0:
|
| 172 |
-
issues.append(("HIGH_PASS_FILTER", "HIGH",
|
| 173 |
-
f"Very low energy below 200Hz ({energy['below_200hz']:.2f}%). Likely HPF applied."))
|
| 174 |
-
elif energy["below_200hz"] < 5.0:
|
| 175 |
-
issues.append(("HIGH_PASS_FILTER", "MEDIUM",
|
| 176 |
-
f"Low energy below 200Hz ({energy['below_200hz']:.2f}%). Possible mild HPF."))
|
| 177 |
-
|
| 178 |
-
if energy["above_12khz"] < 0.2 and spectral["highest_freq_minus60db"] < 12000:
|
| 179 |
-
issues.append(("HF_LOSS", "HIGH",
|
| 180 |
-
f"Severe HF loss. Only {energy['above_12khz']:.3f}% above 12kHz."))
|
| 181 |
-
elif energy["above_12khz"] < 1.0:
|
| 182 |
-
issues.append(("HF_LOSS", "MEDIUM",
|
| 183 |
-
f"Reduced HF content ({energy['above_12khz']:.2f}% above 12kHz)."))
|
| 184 |
|
| 185 |
-
|
| 186 |
-
|
| 187 |
-
|
|
|
|
| 188 |
|
| 189 |
-
|
| 190 |
-
|
| 191 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 192 |
|
| 193 |
-
|
| 194 |
-
|
| 195 |
-
|
| 196 |
-
|
| 197 |
-
issues.append(("COMPRESSION", "MEDIUM",
|
| 198 |
-
f"Low crest factor ({time_stats['crest_factor_db']:.1f}dB). Moderate compression."))
|
| 199 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 200 |
if time_stats["peak"] >= 0.999:
|
| 201 |
-
issues.append(("CLIPPING", "CRITICAL",
|
| 202 |
-
|
| 203 |
|
| 204 |
return issues
|
| 205 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 206 |
def create_report(audio_data, output_path):
|
| 207 |
"""Create comprehensive PNG report"""
|
| 208 |
-
|
| 209 |
-
plt.style.use(
|
| 210 |
-
|
| 211 |
-
|
| 212 |
-
|
| 213 |
-
fig.
|
| 214 |
-
|
| 215 |
-
|
| 216 |
-
|
| 217 |
-
|
| 218 |
-
|
| 219 |
-
|
| 220 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 221 |
ax_spec = fig.add_subplot(gs[0, :])
|
| 222 |
-
|
| 223 |
-
|
| 224 |
-
|
| 225 |
-
|
|
|
|
| 226 |
img = librosa.display.specshow(
|
| 227 |
-
S_db,
|
| 228 |
-
|
| 229 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 230 |
)
|
| 231 |
-
|
| 232 |
-
ax_spec.
|
| 233 |
-
ax_spec.
|
| 234 |
-
ax_spec.
|
| 235 |
-
|
| 236 |
-
|
|
|
|
| 237 |
cbar.ax.tick_params(labelsize=10)
|
| 238 |
-
cbar.set_label(
|
| 239 |
-
|
| 240 |
-
#
|
|
|
|
|
|
|
|
|
|
| 241 |
ax_info = fig.add_subplot(gs[1, 0:2])
|
| 242 |
-
ax_info.axis(
|
| 243 |
-
|
| 244 |
-
|
| 245 |
-
|
|
|
|
| 246 |
info_lines = [
|
| 247 |
"FILE INFORMATION",
|
| 248 |
"β" * 50,
|
| 249 |
f"Sample Rate: {info['samplerate']:,} Hz",
|
| 250 |
f"Channels: {info['channels']}",
|
| 251 |
-
f"Duration: {info['duration']:.2f}
|
| 252 |
f"Format: {info['format']} ({info['subtype']})",
|
| 253 |
f"Total Frames: {info['frames']:,}",
|
| 254 |
"",
|
| 255 |
"TIME-DOMAIN ANALYSIS",
|
| 256 |
"β" * 50,
|
| 257 |
-
f"Peak Level: {time['peak_db']:.2f} dBFS
|
| 258 |
-
f"RMS Level: {time['rms_db']:.2f} dBFS
|
| 259 |
f"Crest Factor: {time['crest_factor_db']:.2f} dB",
|
| 260 |
f"Noise Floor: {time['noise_floor']:.6f}",
|
| 261 |
f"Est. SNR: {time['snr_db']:.1f} dB",
|
| 262 |
f"Zero Cross Rate: {time['zero_crossing_rate']:.4f}",
|
| 263 |
]
|
| 264 |
-
|
| 265 |
-
if audio_data.get(
|
| 266 |
info_lines.extend([
|
| 267 |
"",
|
| 268 |
"LOUDNESS (BS.1770)",
|
| 269 |
"β" * 50,
|
| 270 |
f"Integrated LUFS: {audio_data['lufs']:.2f} LUFS"
|
| 271 |
])
|
| 272 |
-
|
| 273 |
info_text = "\n".join(info_lines)
|
| 274 |
-
|
| 275 |
-
|
| 276 |
-
|
| 277 |
-
|
| 278 |
-
|
| 279 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 280 |
ax_spectral = fig.add_subplot(gs[1, 2:4])
|
| 281 |
-
ax_spectral.axis(
|
| 282 |
-
|
| 283 |
-
|
| 284 |
-
|
|
|
|
| 285 |
spectral_lines = [
|
| 286 |
"SPECTRAL ANALYSIS",
|
| 287 |
"β" * 50,
|
|
@@ -290,68 +353,118 @@ def create_report(audio_data, output_path):
|
|
| 290 |
f"Flatness: {spec['spectral_flatness']:.4f}",
|
| 291 |
f"Rolloff: {spec['spectral_rolloff']:.1f} Hz",
|
| 292 |
"",
|
| 293 |
-
"FREQUENCY
|
| 294 |
"β" * 50,
|
| 295 |
f"85% Energy: {spec['rolloff_85pct']:.1f} Hz",
|
| 296 |
f"95% Energy: {spec['rolloff_95pct']:.1f} Hz",
|
| 297 |
f"Highest (-60dB): {spec['highest_freq_minus60db']:.1f} Hz",
|
| 298 |
"",
|
| 299 |
-
"ENERGY DISTRIBUTION
|
| 300 |
"β" * 50,
|
| 301 |
f"< 100 Hz: {energy['below_100hz']:.2f}%",
|
| 302 |
-
f"100
|
| 303 |
-
f"500
|
| 304 |
-
f"2k
|
| 305 |
-
f"
|
| 306 |
-
f"
|
| 307 |
-
f">
|
| 308 |
]
|
| 309 |
-
|
| 310 |
spectral_text = "\n".join(spectral_lines)
|
| 311 |
-
|
| 312 |
-
|
| 313 |
-
|
| 314 |
-
|
| 315 |
-
|
| 316 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 317 |
ax_energy = fig.add_subplot(gs[2, :])
|
| 318 |
-
|
| 319 |
-
bands = [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 320 |
values = [
|
| 321 |
-
energy[
|
| 322 |
-
energy[
|
| 323 |
-
energy[
|
| 324 |
-
energy[
|
| 325 |
-
energy[
|
| 326 |
-
energy[
|
| 327 |
-
energy[
|
| 328 |
]
|
| 329 |
-
|
| 330 |
-
colors = [
|
| 331 |
-
|
| 332 |
-
|
| 333 |
-
|
| 334 |
-
|
| 335 |
-
|
| 336 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 337 |
ax_energy.set_axisbelow(True)
|
| 338 |
-
|
| 339 |
for bar, val in zip(bars, values):
|
| 340 |
height = bar.get_height()
|
| 341 |
-
ax_energy.text(
|
| 342 |
-
|
| 343 |
-
|
| 344 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 345 |
# ISSUES PANEL
|
|
|
|
|
|
|
| 346 |
ax_issues = fig.add_subplot(gs[3, 0:3])
|
| 347 |
-
ax_issues.axis(
|
| 348 |
-
|
| 349 |
-
issues = audio_data[
|
| 350 |
-
|
| 351 |
-
issue_lines = [
|
| 352 |
-
|
|
|
|
|
|
|
|
|
|
| 353 |
if not issues:
|
| 354 |
-
issue_lines.append("β
No significant issues detected
|
| 355 |
else:
|
| 356 |
severity_icons = {
|
| 357 |
"CRITICAL": "π΄ CRITICAL",
|
|
@@ -359,53 +472,73 @@ def create_report(audio_data, output_path):
|
|
| 359 |
"MEDIUM": "π‘ MEDIUM",
|
| 360 |
"LOW": "π’ LOW"
|
| 361 |
}
|
| 362 |
-
|
| 363 |
for issue_type, severity, description in issues:
|
| 364 |
icon = severity_icons.get(severity, "βͺ INFO")
|
| 365 |
-
issue_lines.append(f"\n{icon}
|
| 366 |
issue_lines.append(f" β {description}")
|
| 367 |
-
|
| 368 |
-
|
|
|
|
| 369 |
issue_lines.append(f"\nπ΅ SPECTRAL NOTCHES DETECTED: {len(spec['spectral_notches'])}")
|
| 370 |
-
for i, notch in enumerate(spec[
|
| 371 |
-
issue_lines.append(
|
| 372 |
-
|
| 373 |
-
|
| 374 |
-
|
| 375 |
-
|
|
|
|
|
|
|
|
|
|
| 376 |
issue_lines.append(f"\nβ οΈ BRICK-WALL FILTER: Detected at {spec['brick_wall_freq']:.0f} Hz")
|
| 377 |
-
|
| 378 |
issues_text = "\n".join(issue_lines)
|
| 379 |
-
|
| 380 |
-
|
| 381 |
-
|
| 382 |
-
|
| 383 |
-
|
| 384 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 385 |
ax_score = fig.add_subplot(gs[3, 3])
|
| 386 |
-
ax_score.axis(
|
| 387 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 388 |
score = 100
|
| 389 |
-
critical = sum(1 for _, sev, _ in issues if sev == 'CRITICAL')
|
| 390 |
-
high = sum(1 for _, sev, _ in issues if sev == 'HIGH')
|
| 391 |
-
medium = sum(1 for _, sev, _ in issues if sev == 'MEDIUM')
|
| 392 |
-
|
| 393 |
score -= critical * 30
|
| 394 |
score -= high * 15
|
| 395 |
score -= medium * 5
|
| 396 |
score = max(0, score)
|
| 397 |
-
|
|
|
|
| 398 |
if score >= 90:
|
| 399 |
-
grade,
|
| 400 |
elif score >= 75:
|
| 401 |
-
grade,
|
| 402 |
elif score >= 60:
|
| 403 |
-
grade,
|
| 404 |
elif score >= 40:
|
| 405 |
-
grade,
|
| 406 |
else:
|
| 407 |
-
grade,
|
| 408 |
-
|
| 409 |
score_lines = [
|
| 410 |
"QUALITY ASSESSMENT",
|
| 411 |
"β" * 28,
|
|
@@ -421,55 +554,76 @@ def create_report(audio_data, output_path):
|
|
| 421 |
f"π‘ Medium: {medium}",
|
| 422 |
"",
|
| 423 |
"β" * 28,
|
| 424 |
-
|
| 425 |
f"{audio_data['timestamp']}"
|
| 426 |
]
|
| 427 |
-
|
| 428 |
score_text = "\n".join(score_lines)
|
| 429 |
-
ax_score.text(0.5, 0.5, score_text, transform=ax_score.transAxes,
|
| 430 |
-
fontsize=11, ha='center', va='center', family='monospace',
|
| 431 |
-
bbox=dict(boxstyle='round,pad=1.2', facecolor=color,
|
| 432 |
-
edgecolor='black', linewidth=3, alpha=0.7),
|
| 433 |
-
fontweight='bold')
|
| 434 |
-
|
| 435 |
-
plt.savefig(output_path, dpi=300, bbox_inches='tight',
|
| 436 |
-
facecolor='white', edgecolor='none')
|
| 437 |
-
plt.close()
|
| 438 |
-
|
| 439 |
-
return output_path
|
| 440 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 441 |
|
| 442 |
-
# ====================
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 443 |
|
| 444 |
def analyze_audio(audio_file, progress=gr.Progress()):
|
| 445 |
-
"""Analyze uploaded audio file"""
|
| 446 |
if audio_file is None:
|
| 447 |
return None, "β οΈ Please upload an audio file to analyze."
|
| 448 |
-
|
| 449 |
try:
|
| 450 |
progress(0.1, desc="Reading audio file...")
|
| 451 |
-
|
| 452 |
output_dir = Path("reports")
|
| 453 |
output_dir.mkdir(exist_ok=True)
|
| 454 |
-
|
| 455 |
path = Path(audio_file)
|
| 456 |
-
|
| 457 |
progress(0.2, desc="Loading audio data...")
|
| 458 |
info = read_audio_info(str(path))
|
| 459 |
y, sr = librosa.load(str(path), sr=None, mono=True)
|
| 460 |
-
|
| 461 |
progress(0.4, desc="Analyzing time-domain...")
|
| 462 |
time_stats = compute_time_domain_stats(y)
|
| 463 |
-
|
| 464 |
progress(0.6, desc="Performing spectral analysis...")
|
| 465 |
spectral = compute_spectral_analysis(y, sr)
|
| 466 |
-
|
| 467 |
progress(0.7, desc="Computing loudness...")
|
| 468 |
lufs = compute_loudness(y, sr) if LOUDNESS_AVAILABLE else None
|
| 469 |
-
|
| 470 |
progress(0.8, desc="Detecting audio issues...")
|
| 471 |
issues = detect_audio_issues(spectral, time_stats)
|
| 472 |
-
|
| 473 |
audio_data = {
|
| 474 |
"filename": path.name,
|
| 475 |
"info": info,
|
|
@@ -479,24 +633,27 @@ def analyze_audio(audio_file, progress=gr.Progress()):
|
|
| 479 |
"issues": issues,
|
| 480 |
"timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
| 481 |
}
|
| 482 |
-
|
| 483 |
progress(0.9, desc="Generating report...")
|
| 484 |
-
|
| 485 |
output_filename = path.stem + "_report.png"
|
| 486 |
output_path = output_dir / output_filename
|
| 487 |
-
|
| 488 |
create_report(audio_data, str(output_path))
|
| 489 |
-
|
| 490 |
progress(1.0, desc="Complete!")
|
| 491 |
-
|
| 492 |
-
#
|
| 493 |
-
|
| 494 |
-
|
| 495 |
-
|
| 496 |
-
|
|
|
|
|
|
|
|
|
|
| 497 |
score = 100 - (critical * 30) - (high * 15) - (medium * 5)
|
| 498 |
score = max(0, score)
|
| 499 |
-
|
| 500 |
if score >= 90:
|
| 501 |
grade, quality, color = "A", "EXCELLENT", "π’"
|
| 502 |
elif score >= 75:
|
|
@@ -507,102 +664,117 @@ def analyze_audio(audio_file, progress=gr.Progress()):
|
|
| 507 |
grade, quality, color = "D", "POOR", "π "
|
| 508 |
else:
|
| 509 |
grade, quality, color = "F", "CRITICAL", "π΄"
|
| 510 |
-
|
| 511 |
-
energy = spectral['energy_distribution']
|
| 512 |
-
|
| 513 |
-
summary = f"""
|
| 514 |
-
# π΅ Analysis Complete! β
|
| 515 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 516 |
## File Information
|
| 517 |
- **Filename:** `{audio_data['filename']}`
|
| 518 |
-
- **Duration:** {info['duration']:.2f}
|
| 519 |
-
- **Sample Rate:** {info['samplerate']:,} Hz
|
| 520 |
-
- **Channels:** {info['channels']}
|
| 521 |
- **Format:** {info['format']} ({info['subtype']})
|
| 522 |
|
| 523 |
---
|
| 524 |
|
| 525 |
-
## Quality Assessment
|
| 526 |
-
|
| 527 |
-
### Overall Score: **{score}/100** - Grade **{grade}** {color}
|
| 528 |
**Quality Rating:** {quality}
|
| 529 |
|
| 530 |
### Audio Metrics
|
| 531 |
| Metric | Value |
|
| 532 |
-
|--------|-------|
|
| 533 |
| Peak Level | {time_stats['peak_db']:.2f} dBFS |
|
| 534 |
| RMS Level | {time_stats['rms_db']:.2f} dBFS |
|
| 535 |
| Crest Factor | {time_stats['crest_factor_db']:.2f} dB |
|
| 536 |
| SNR (Est.) | {time_stats['snr_db']:.1f} dB |
|
| 537 |
"""
|
| 538 |
-
|
| 539 |
if lufs is not None:
|
| 540 |
summary += f"| Integrated LUFS | {lufs:.2f} LUFS |\n"
|
| 541 |
-
|
| 542 |
summary += f"""
|
| 543 |
---
|
| 544 |
|
| 545 |
-
## Spectral Analysis
|
| 546 |
| Parameter | Value |
|
| 547 |
-
|-----------|-------|
|
| 548 |
| Spectral Centroid | {spectral['spectral_centroid']:.1f} Hz |
|
| 549 |
| 85% Rolloff | {spectral['rolloff_85pct']:.1f} Hz |
|
| 550 |
| 95% Rolloff | {spectral['rolloff_95pct']:.1f} Hz |
|
| 551 |
-
| Highest Freq (
|
| 552 |
|
| 553 |
-
### Energy Distribution
|
| 554 |
-
|
| 555 |
-
- **100
|
| 556 |
-
- **500
|
| 557 |
-
- **2k
|
| 558 |
-
- **
|
| 559 |
-
- **
|
|
|
|
|
|
|
| 560 |
|
| 561 |
---
|
| 562 |
|
| 563 |
## Issues Detected: **{len(issues)}**
|
| 564 |
"""
|
| 565 |
-
|
| 566 |
if issues:
|
| 567 |
summary += "\n### β οΈ Detected Issues:\n\n"
|
| 568 |
-
|
| 569 |
-
|
| 570 |
-
for issue_type,
|
| 571 |
-
|
| 572 |
-
summary += f"{icon} **[{severity}] {issue_type}**\n"
|
| 573 |
summary += f" - {desc}\n\n"
|
| 574 |
else:
|
| 575 |
-
summary += "\n### β
No significant issues detected
|
| 576 |
-
|
| 577 |
-
if spectral[
|
| 578 |
-
summary += f"\n### π΅ Spectral Notches: {len(spectral['spectral_notches'])}\n
|
| 579 |
-
for i,
|
| 580 |
-
summary += f"{i}. **{
|
| 581 |
-
|
| 582 |
-
summary += f"
|
| 583 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 584 |
return str(output_path), summary
|
| 585 |
-
|
| 586 |
except Exception as e:
|
| 587 |
import traceback
|
| 588 |
traceback.print_exc()
|
| 589 |
return None, f"# β Analysis Failed\n\n**Error:** {str(e)}"
|
| 590 |
-
|
| 591 |
-
|
| 592 |
-
# ====================
|
| 593 |
|
| 594 |
with gr.Blocks(title="Audio Forensic Analyzer") as demo:
|
| 595 |
-
|
| 596 |
gr.Markdown("""
|
| 597 |
-
# π΅ Audio Forensic Analyzer
|
| 598 |
-
|
| 599 |
-
|
| 600 |
-
|
| 601 |
-
|
| 602 |
-
|
| 603 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 604 |
""")
|
| 605 |
-
|
| 606 |
with gr.Row():
|
| 607 |
with gr.Column(scale=1):
|
| 608 |
audio_input = gr.Audio(
|
|
@@ -610,28 +782,33 @@ with gr.Blocks(title="Audio Forensic Analyzer") as demo:
|
|
| 610 |
type="filepath",
|
| 611 |
sources=["upload"]
|
| 612 |
)
|
| 613 |
-
|
| 614 |
analyze_btn = gr.Button(
|
| 615 |
"π Analyze Audio",
|
| 616 |
variant="primary",
|
| 617 |
size="lg"
|
| 618 |
)
|
| 619 |
-
|
| 620 |
with gr.Column(scale=2):
|
| 621 |
report_output = gr.Image(
|
| 622 |
label="π Analysis Report",
|
| 623 |
type="filepath",
|
| 624 |
height=600
|
| 625 |
)
|
| 626 |
-
|
| 627 |
with gr.Row():
|
| 628 |
summary_output = gr.Markdown(label="π Analysis Summary")
|
| 629 |
-
|
| 630 |
analyze_btn.click(
|
| 631 |
fn=analyze_audio,
|
| 632 |
inputs=[audio_input],
|
| 633 |
outputs=[report_output, summary_output]
|
| 634 |
)
|
| 635 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 636 |
if __name__ == "__main__":
|
| 637 |
-
demo.launch()
|
|
|
|
| 1 |
+
# ============================================================
|
| 2 |
+
# app.py (Updated Full Version β Chunk 1: Lines 1β300)
|
| 3 |
+
# ============================================================
|
| 4 |
+
|
| 5 |
import gradio as gr
|
| 6 |
import sys
|
| 7 |
from pathlib import Path
|
|
|
|
| 27 |
# ==================== ANALYSIS FUNCTIONS ====================
|
| 28 |
|
| 29 |
def read_audio_info(path):
|
| 30 |
+
"""Read audio file metadata"""
|
| 31 |
info = sf.info(path)
|
| 32 |
return {
|
| 33 |
"samplerate": int(info.samplerate),
|
|
|
|
| 40 |
|
| 41 |
|
| 42 |
def compute_time_domain_stats(y):
|
| 43 |
+
"""Calculate time-domain statistics"""
|
| 44 |
peak = float(np.max(np.abs(y)))
|
| 45 |
+
rms = float(np.sqrt(np.mean(y ** 2)))
|
| 46 |
+
|
| 47 |
peak_db = 20 * np.log10(max(peak, 1e-12))
|
| 48 |
rms_db = 20 * np.log10(max(rms, 1e-12))
|
| 49 |
crest_factor = peak_db - rms_db
|
| 50 |
+
|
| 51 |
abs_y = np.abs(y)
|
| 52 |
noise_floor = float(np.percentile(abs_y, 10))
|
| 53 |
snr_est = 20 * np.log10(max(rms, 1e-12) / max(noise_floor, 1e-12))
|
| 54 |
zcr = float(np.mean(librosa.feature.zero_crossing_rate(y)))
|
| 55 |
+
|
| 56 |
return {
|
| 57 |
"peak": peak,
|
| 58 |
"rms": rms,
|
|
|
|
| 65 |
}
|
| 66 |
|
| 67 |
|
| 68 |
+
# ============================================================
|
| 69 |
+
# UPDATED SPECTRAL ANALYSIS FUNCTION (FFT=4096, 90th percentile)
|
| 70 |
+
# ============================================================
|
| 71 |
+
|
| 72 |
+
def compute_spectral_analysis(y, sr, n_fft=4096):
|
| 73 |
+
"""Comprehensive spectral analysis tuned for speech QC"""
|
| 74 |
+
|
| 75 |
hop_length = n_fft // 4
|
| 76 |
|
| 77 |
# STFT
|
| 78 |
+
S = np.abs(librosa.stft(y, n_fft=n_fft, hop_length=hop_length, window="hann"))
|
| 79 |
+
freqs = np.linspace(0, sr / 2, S.shape[0])
|
| 80 |
|
| 81 |
+
# Convert amplitude to dB
|
| 82 |
S_db = librosa.amplitude_to_db(S, ref=np.max)
|
| 83 |
|
| 84 |
+
# ===== UPDATED ENERGY ESTIMATE: 90th percentile of power =====
|
| 85 |
+
S_power = S ** 2
|
| 86 |
+
energy = np.percentile(S_power, 90, axis=1) + 1e-20
|
| 87 |
total_energy = float(np.sum(energy))
|
|
|
|
| 88 |
cum_energy = np.cumsum(energy)
|
| 89 |
+
|
| 90 |
+
# Rolloffs
|
| 91 |
roll85_idx = np.searchsorted(cum_energy, 0.85 * total_energy)
|
| 92 |
roll95_idx = np.searchsorted(cum_energy, 0.95 * total_energy)
|
|
|
|
|
|
|
| 93 |
|
| 94 |
+
freq_at_85 = float(freqs[min(roll85_idx, len(freqs) - 1)])
|
| 95 |
+
freq_at_95 = float(freqs[min(roll95_idx, len(freqs) - 1)])
|
| 96 |
+
|
| 97 |
+
# ===== UPDATED HF ENVELOPE: 90th percentile of dB =====
|
| 98 |
mean_db_per_bin = np.percentile(S_db, 90, axis=1)
|
| 99 |
|
| 100 |
peak_db = float(np.max(S_db))
|
| 101 |
+
threshold_db = peak_db - 60
|
| 102 |
+
|
| 103 |
non_silent_bins = np.where(mean_db_per_bin > threshold_db)[0]
|
| 104 |
highest_freq = float(freqs[non_silent_bins[-1]]) if non_silent_bins.size else 0.0
|
| 105 |
|
| 106 |
+
# ===================== UPDATED SPEECH-CENTRIC BANDS =====================
|
| 107 |
+
def band_energy(low, high):
|
| 108 |
+
i1 = np.searchsorted(freqs, low)
|
| 109 |
+
i2 = np.searchsorted(freqs, high)
|
| 110 |
+
return float(100 * np.sum(energy[i1:i2]) / total_energy)
|
| 111 |
|
| 112 |
+
def band_energy_above(f):
|
| 113 |
idx = np.searchsorted(freqs, f)
|
| 114 |
+
return float(100 * np.sum(energy[idx:]) / total_energy)
|
| 115 |
|
| 116 |
energy_stats = {
|
| 117 |
+
"below_100hz": band_energy(0, 100),
|
| 118 |
+
"100_500hz": band_energy(100, 500),
|
| 119 |
+
"500_2khz": band_energy(500, 2000),
|
| 120 |
+
"2k_8khz": band_energy(2000, 8000),
|
| 121 |
+
"8k_12khz": band_energy(8000, 12000),
|
| 122 |
+
"12k_16khz": band_energy(12000, 16000),
|
| 123 |
+
"above_16khz": band_energy_above(16000)
|
|
|
|
| 124 |
}
|
| 125 |
|
| 126 |
+
# Brickwall detection
|
| 127 |
diffs = np.diff(mean_db_per_bin)
|
| 128 |
+
big_drop_idx = np.where(diffs < -20)[0]
|
| 129 |
brick_wall = bool(big_drop_idx.size)
|
| 130 |
brick_freq = float(freqs[big_drop_idx[0]]) if big_drop_idx.size else None
|
| 131 |
|
| 132 |
+
# Spectral notches
|
| 133 |
smooth = sps.medfilt(mean_db_per_bin, kernel_size=9)
|
| 134 |
minima = sps.argrelextrema(smooth, np.less)[0]
|
| 135 |
notches = []
|
| 136 |
+
|
| 137 |
for m in minima:
|
| 138 |
+
left = smooth[max(0, m - 6):m]
|
| 139 |
+
right = smooth[m + 1:min(len(smooth), m + 7)]
|
| 140 |
+
neighbor_peak = max(
|
| 141 |
left.max() if left.size else -999,
|
| 142 |
right.max() if right.size else -999
|
| 143 |
)
|
| 144 |
+
depth = neighbor_peak - smooth[m]
|
| 145 |
+
if depth >= 15 and freqs[m] > 100:
|
| 146 |
+
notches.append({
|
| 147 |
+
"freq": float(freqs[m]),
|
| 148 |
+
"depth_db": float(depth)
|
| 149 |
+
})
|
| 150 |
+
|
| 151 |
+
# Additional spectral stats
|
| 152 |
centroid = float(np.mean(librosa.feature.spectral_centroid(S=S, sr=sr)))
|
| 153 |
bandwidth = float(np.mean(librosa.feature.spectral_bandwidth(S=S, sr=sr)))
|
| 154 |
flatness = float(np.mean(librosa.feature.spectral_flatness(S=S)))
|
|
|
|
| 169 |
"spectral_centroid": centroid,
|
| 170 |
"spectral_bandwidth": bandwidth,
|
| 171 |
"spectral_flatness": flatness,
|
| 172 |
+
"spectral_rolloff": rolloff
|
| 173 |
}
|
| 174 |
+
# ============================================================
|
| 175 |
+
# UPDATED ISSUE DETECTION (HF thresholds corrected)
|
| 176 |
+
# ============================================================
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 177 |
|
| 178 |
def detect_audio_issues(spectral, time_stats):
|
| 179 |
+
"""Detect common audio processing artifacts"""
|
| 180 |
issues = []
|
| 181 |
energy = spectral["energy_distribution"]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 182 |
|
| 183 |
+
# High-pass detection
|
| 184 |
+
if energy["below_100hz"] < 0.5:
|
| 185 |
+
issues.append(("HIGH_PASS_FILTER", "HIGH",
|
| 186 |
+
f"Very low energy <100Hz ({energy['below_100hz']:.2f}%). Possible HPF."))
|
| 187 |
|
| 188 |
+
# Updated HF-loss rules (speech-appropriate)
|
| 189 |
+
if energy["8k_12khz"] < 0.05 and spectral["highest_freq_minus60db"] < 8000:
|
| 190 |
+
issues.append(("HF_LOSS", "HIGH",
|
| 191 |
+
f"Severe HF loss. Only {energy['8k_12khz']:.3f}% in 8β12kHz."))
|
| 192 |
+
|
| 193 |
+
elif energy["8k_12khz"] < 0.3:
|
| 194 |
+
issues.append(("HF_LOSS", "MEDIUM",
|
| 195 |
+
f"Reduced HF content ({energy['8k_12khz']:.3f}% in 8β12kHz)."))
|
| 196 |
|
| 197 |
+
# Brickwall filter
|
| 198 |
+
if spectral["brick_wall_detected"]:
|
| 199 |
+
issues.append(("BRICK_WALL", "HIGH",
|
| 200 |
+
f"Possible brick-wall at {spectral['brick_wall_freq']:.0f} Hz"))
|
|
|
|
|
|
|
| 201 |
|
| 202 |
+
# Spectral notches
|
| 203 |
+
if len(spectral["spectral_notches"]) > 0:
|
| 204 |
+
issues.append(("SPECTRAL_NOTCHES", "MEDIUM",
|
| 205 |
+
f"{len(spectral['spectral_notches'])} spectral notches detected."))
|
| 206 |
+
|
| 207 |
+
# Compression
|
| 208 |
+
if time_stats["crest_factor_db"] < 3:
|
| 209 |
+
issues.append(("OVER_COMPRESSION", "HIGH",
|
| 210 |
+
f"Very low crest factor {time_stats['crest_factor_db']:.1f} dB"))
|
| 211 |
+
elif time_stats["crest_factor_db"] < 6:
|
| 212 |
+
issues.append(("COMPRESSION", "MEDIUM",
|
| 213 |
+
f"Low crest factor {time_stats['crest_factor_db']:.1f} dB"))
|
| 214 |
+
|
| 215 |
+
# Clipping
|
| 216 |
if time_stats["peak"] >= 0.999:
|
| 217 |
+
issues.append(("CLIPPING", "CRITICAL",
|
| 218 |
+
f"Peak amplitude {time_stats['peak']:.6f}. Possible clipping."))
|
| 219 |
|
| 220 |
return issues
|
| 221 |
|
| 222 |
+
|
| 223 |
+
# ============================================================
|
| 224 |
+
# REPORT GENERATION
|
| 225 |
+
# ============================================================
|
| 226 |
+
|
| 227 |
def create_report(audio_data, output_path):
|
| 228 |
"""Create comprehensive PNG report"""
|
| 229 |
+
|
| 230 |
+
plt.style.use("default")
|
| 231 |
+
|
| 232 |
+
# UPDATED FIGURE SIZE
|
| 233 |
+
fig = plt.figure(figsize=(22, 16))
|
| 234 |
+
fig.patch.set_facecolor("white")
|
| 235 |
+
|
| 236 |
+
fig.suptitle(
|
| 237 |
+
f"AUDIO FORENSIC ANALYSIS REPORT\n{audio_data['filename']}",
|
| 238 |
+
fontsize=20,
|
| 239 |
+
fontweight="bold",
|
| 240 |
+
y=0.97
|
| 241 |
+
)
|
| 242 |
+
|
| 243 |
+
gs = gridspec.GridSpec(
|
| 244 |
+
4, 4,
|
| 245 |
+
figure=fig,
|
| 246 |
+
hspace=0.4,
|
| 247 |
+
wspace=0.4,
|
| 248 |
+
height_ratios=[1.5, 1, 0.8, 0.9],
|
| 249 |
+
left=0.05,
|
| 250 |
+
right=0.95,
|
| 251 |
+
top=0.92,
|
| 252 |
+
bottom=0.05
|
| 253 |
+
)
|
| 254 |
+
|
| 255 |
+
# ============================
|
| 256 |
+
# SPECTROGRAM PLOT (UPDATED)
|
| 257 |
+
# ============================
|
| 258 |
+
|
| 259 |
ax_spec = fig.add_subplot(gs[0, :])
|
| 260 |
+
|
| 261 |
+
S_db = audio_data["spectral"]["S_db"]
|
| 262 |
+
sr = audio_data["info"]["samplerate"]
|
| 263 |
+
hop = audio_data["spectral"]["hop_length"]
|
| 264 |
+
|
| 265 |
img = librosa.display.specshow(
|
| 266 |
+
S_db,
|
| 267 |
+
sr=sr,
|
| 268 |
+
hop_length=hop,
|
| 269 |
+
y_axis="hz",
|
| 270 |
+
x_axis="time",
|
| 271 |
+
cmap="viridis",
|
| 272 |
+
ax=ax_spec,
|
| 273 |
+
vmin=-80,
|
| 274 |
+
vmax=0
|
| 275 |
)
|
| 276 |
+
|
| 277 |
+
ax_spec.set_title("Spectrogram", fontsize=14, fontweight="bold", pad=10)
|
| 278 |
+
ax_spec.set_ylabel("Frequency (Hz)", fontsize=11, fontweight="bold")
|
| 279 |
+
ax_spec.set_xlabel("Time (seconds)", fontsize=11, fontweight="bold")
|
| 280 |
+
ax_spec.grid(True, alpha=0.3, linestyle="--", linewidth=0.5)
|
| 281 |
+
|
| 282 |
+
cbar = plt.colorbar(img, ax=ax_spec, format="%+2.0f dB", pad=0.01)
|
| 283 |
cbar.ax.tick_params(labelsize=10)
|
| 284 |
+
cbar.set_label("Magnitude (dB)", fontsize=10, fontweight="bold")
|
| 285 |
+
|
| 286 |
+
# ============================
|
| 287 |
+
# FILE INFO BLOCK
|
| 288 |
+
# ============================
|
| 289 |
+
|
| 290 |
ax_info = fig.add_subplot(gs[1, 0:2])
|
| 291 |
+
ax_info.axis("off")
|
| 292 |
+
|
| 293 |
+
info = audio_data["info"]
|
| 294 |
+
time = audio_data["time_stats"]
|
| 295 |
+
|
| 296 |
info_lines = [
|
| 297 |
"FILE INFORMATION",
|
| 298 |
"β" * 50,
|
| 299 |
f"Sample Rate: {info['samplerate']:,} Hz",
|
| 300 |
f"Channels: {info['channels']}",
|
| 301 |
+
f"Duration: {info['duration']:.2f} sec",
|
| 302 |
f"Format: {info['format']} ({info['subtype']})",
|
| 303 |
f"Total Frames: {info['frames']:,}",
|
| 304 |
"",
|
| 305 |
"TIME-DOMAIN ANALYSIS",
|
| 306 |
"β" * 50,
|
| 307 |
+
f"Peak Level: {time['peak_db']:.2f} dBFS ({time['peak']:.6f})",
|
| 308 |
+
f"RMS Level: {time['rms_db']:.2f} dBFS ({time['rms']:.6f})",
|
| 309 |
f"Crest Factor: {time['crest_factor_db']:.2f} dB",
|
| 310 |
f"Noise Floor: {time['noise_floor']:.6f}",
|
| 311 |
f"Est. SNR: {time['snr_db']:.1f} dB",
|
| 312 |
f"Zero Cross Rate: {time['zero_crossing_rate']:.4f}",
|
| 313 |
]
|
| 314 |
+
|
| 315 |
+
if audio_data.get("lufs") is not None:
|
| 316 |
info_lines.extend([
|
| 317 |
"",
|
| 318 |
"LOUDNESS (BS.1770)",
|
| 319 |
"β" * 50,
|
| 320 |
f"Integrated LUFS: {audio_data['lufs']:.2f} LUFS"
|
| 321 |
])
|
| 322 |
+
|
| 323 |
info_text = "\n".join(info_lines)
|
| 324 |
+
|
| 325 |
+
ax_info.text(
|
| 326 |
+
0.05, 0.95, info_text,
|
| 327 |
+
transform=ax_info.transAxes,
|
| 328 |
+
fontsize=11,
|
| 329 |
+
verticalalignment="top",
|
| 330 |
+
family="monospace",
|
| 331 |
+
bbox=dict(
|
| 332 |
+
boxstyle="round,pad=1",
|
| 333 |
+
facecolor="#E8F4F8",
|
| 334 |
+
edgecolor="#0077BE",
|
| 335 |
+
linewidth=2
|
| 336 |
+
)
|
| 337 |
+
)
|
| 338 |
+
# ============================
|
| 339 |
+
# SPECTRAL STATS PANEL
|
| 340 |
+
# ============================
|
| 341 |
+
|
| 342 |
ax_spectral = fig.add_subplot(gs[1, 2:4])
|
| 343 |
+
ax_spectral.axis("off")
|
| 344 |
+
|
| 345 |
+
spec = audio_data["spectral"]
|
| 346 |
+
energy = spec["energy_distribution"]
|
| 347 |
+
|
| 348 |
spectral_lines = [
|
| 349 |
"SPECTRAL ANALYSIS",
|
| 350 |
"β" * 50,
|
|
|
|
| 353 |
f"Flatness: {spec['spectral_flatness']:.4f}",
|
| 354 |
f"Rolloff: {spec['spectral_rolloff']:.1f} Hz",
|
| 355 |
"",
|
| 356 |
+
"FREQUENCY ROLLOFF POINTS",
|
| 357 |
"β" * 50,
|
| 358 |
f"85% Energy: {spec['rolloff_85pct']:.1f} Hz",
|
| 359 |
f"95% Energy: {spec['rolloff_95pct']:.1f} Hz",
|
| 360 |
f"Highest (-60dB): {spec['highest_freq_minus60db']:.1f} Hz",
|
| 361 |
"",
|
| 362 |
+
"ENERGY DISTRIBUTION (Speech Bands)",
|
| 363 |
"β" * 50,
|
| 364 |
f"< 100 Hz: {energy['below_100hz']:.2f}%",
|
| 365 |
+
f"100β500 Hz: {energy['100_500hz']:.2f}%",
|
| 366 |
+
f"500β2k Hz: {energy['500_2khz']:.2f}%",
|
| 367 |
+
f"2kβ8k Hz: {energy['2k_8khz']:.2f}%",
|
| 368 |
+
f"8kβ12k Hz: {energy['8k_12khz']:.2f}%",
|
| 369 |
+
f"12kβ16k Hz: {energy['12k_16khz']:.2f}%",
|
| 370 |
+
f"> 16k Hz: {energy['above_16khz']:.2f}%",
|
| 371 |
]
|
| 372 |
+
|
| 373 |
spectral_text = "\n".join(spectral_lines)
|
| 374 |
+
|
| 375 |
+
ax_spectral.text(
|
| 376 |
+
0.05, 0.95, spectral_text,
|
| 377 |
+
transform=ax_spectral.transAxes,
|
| 378 |
+
fontsize=11,
|
| 379 |
+
verticalalignment="top",
|
| 380 |
+
family="monospace",
|
| 381 |
+
bbox=dict(
|
| 382 |
+
boxstyle="round,pad=1",
|
| 383 |
+
facecolor="#FFF4E6",
|
| 384 |
+
edgecolor="#FF8C00",
|
| 385 |
+
linewidth=2
|
| 386 |
+
)
|
| 387 |
+
)
|
| 388 |
+
|
| 389 |
+
|
| 390 |
+
# ============================
|
| 391 |
+
# ENERGY DISTRIBUTION BAR CHART
|
| 392 |
+
# ============================
|
| 393 |
+
|
| 394 |
ax_energy = fig.add_subplot(gs[2, :])
|
| 395 |
+
|
| 396 |
+
bands = [
|
| 397 |
+
"<100Hz",
|
| 398 |
+
"100β500Hz",
|
| 399 |
+
"500β2kHz",
|
| 400 |
+
"2kβ8kHz",
|
| 401 |
+
"8kβ12kHz",
|
| 402 |
+
"12kβ16kHz",
|
| 403 |
+
">16kHz"
|
| 404 |
+
]
|
| 405 |
+
|
| 406 |
values = [
|
| 407 |
+
energy["below_100hz"],
|
| 408 |
+
energy["100_500hz"],
|
| 409 |
+
energy["500_2khz"],
|
| 410 |
+
energy["2k_8khz"],
|
| 411 |
+
energy["8k_12khz"],
|
| 412 |
+
energy["12k_16khz"],
|
| 413 |
+
energy["above_16khz"]
|
| 414 |
]
|
| 415 |
+
|
| 416 |
+
colors = [
|
| 417 |
+
"#2C3E50",
|
| 418 |
+
"#E74C3C",
|
| 419 |
+
"#E67E22",
|
| 420 |
+
"#F39C12",
|
| 421 |
+
"#2ECC71",
|
| 422 |
+
"#3498DB",
|
| 423 |
+
"#9B59B6"
|
| 424 |
+
]
|
| 425 |
+
|
| 426 |
+
bars = ax_energy.bar(
|
| 427 |
+
bands, values,
|
| 428 |
+
color=colors,
|
| 429 |
+
edgecolor="black",
|
| 430 |
+
linewidth=1.5,
|
| 431 |
+
alpha=0.85
|
| 432 |
+
)
|
| 433 |
+
|
| 434 |
+
ax_energy.set_ylabel("Energy Percentage (%)", fontsize=12, fontweight="bold")
|
| 435 |
+
ax_energy.set_title("Frequency Band Energy Distribution", fontsize=13, fontweight="bold", pad=10)
|
| 436 |
+
ax_energy.grid(axis="y", alpha=0.4, linestyle="--", linewidth=0.8)
|
| 437 |
+
ax_energy.set_ylim(0, max(values) * 1.15 if max(values) > 0 else 1)
|
| 438 |
ax_energy.set_axisbelow(True)
|
| 439 |
+
|
| 440 |
for bar, val in zip(bars, values):
|
| 441 |
height = bar.get_height()
|
| 442 |
+
ax_energy.text(
|
| 443 |
+
bar.get_x() + bar.get_width() / 2., height + 0.5,
|
| 444 |
+
f"{val:.2f}%",
|
| 445 |
+
ha="center",
|
| 446 |
+
va="bottom",
|
| 447 |
+
fontsize=10,
|
| 448 |
+
fontweight="bold"
|
| 449 |
+
)
|
| 450 |
+
|
| 451 |
+
|
| 452 |
+
# ============================
|
| 453 |
# ISSUES PANEL
|
| 454 |
+
# ============================
|
| 455 |
+
|
| 456 |
ax_issues = fig.add_subplot(gs[3, 0:3])
|
| 457 |
+
ax_issues.axis("off")
|
| 458 |
+
|
| 459 |
+
issues = audio_data["issues"]
|
| 460 |
+
|
| 461 |
+
issue_lines = [
|
| 462 |
+
"DETECTED ISSUES & WARNINGS",
|
| 463 |
+
"β" * 80
|
| 464 |
+
]
|
| 465 |
+
|
| 466 |
if not issues:
|
| 467 |
+
issue_lines.append("β
No significant issues detected.")
|
| 468 |
else:
|
| 469 |
severity_icons = {
|
| 470 |
"CRITICAL": "π΄ CRITICAL",
|
|
|
|
| 472 |
"MEDIUM": "π‘ MEDIUM",
|
| 473 |
"LOW": "π’ LOW"
|
| 474 |
}
|
| 475 |
+
|
| 476 |
for issue_type, severity, description in issues:
|
| 477 |
icon = severity_icons.get(severity, "βͺ INFO")
|
| 478 |
+
issue_lines.append(f"\n{icon} β {issue_type}")
|
| 479 |
issue_lines.append(f" β {description}")
|
| 480 |
+
|
| 481 |
+
# If spectral notches exist, list them
|
| 482 |
+
if spec["spectral_notches"]:
|
| 483 |
issue_lines.append(f"\nπ΅ SPECTRAL NOTCHES DETECTED: {len(spec['spectral_notches'])}")
|
| 484 |
+
for i, notch in enumerate(spec["spectral_notches"][:5], start=1):
|
| 485 |
+
issue_lines.append(
|
| 486 |
+
f" {i}. Frequency: {notch['freq']:.1f} Hz, Depth: {notch['depth_db']:.1f} dB"
|
| 487 |
+
)
|
| 488 |
+
if len(spec["spectral_notches"]) > 5:
|
| 489 |
+
issue_lines.append(f" ... and {len(spec['spectral_notches']) - 5} more")
|
| 490 |
+
|
| 491 |
+
# Brickwall detection notice
|
| 492 |
+
if spec["brick_wall_detected"]:
|
| 493 |
issue_lines.append(f"\nβ οΈ BRICK-WALL FILTER: Detected at {spec['brick_wall_freq']:.0f} Hz")
|
| 494 |
+
|
| 495 |
issues_text = "\n".join(issue_lines)
|
| 496 |
+
|
| 497 |
+
ax_issues.text(
|
| 498 |
+
0.05, 0.95, issues_text,
|
| 499 |
+
transform=ax_issues.transAxes,
|
| 500 |
+
fontsize=11,
|
| 501 |
+
verticalalignment="top",
|
| 502 |
+
family="monospace",
|
| 503 |
+
bbox=dict(
|
| 504 |
+
boxstyle="round,pad=1",
|
| 505 |
+
facecolor="#FFE6E6",
|
| 506 |
+
edgecolor="#DC143C",
|
| 507 |
+
linewidth=2
|
| 508 |
+
)
|
| 509 |
+
)
|
| 510 |
+
# ============================
|
| 511 |
+
# QUALITY SCORE PANEL
|
| 512 |
+
# ============================
|
| 513 |
+
|
| 514 |
ax_score = fig.add_subplot(gs[3, 3])
|
| 515 |
+
ax_score.axis("off")
|
| 516 |
+
|
| 517 |
+
issues = audio_data["issues"]
|
| 518 |
+
|
| 519 |
+
# Score penalties
|
| 520 |
+
critical = sum(1 for _, sev, _ in issues if sev == "CRITICAL")
|
| 521 |
+
high = sum(1 for _, sev, _ in issues if sev == "HIGH")
|
| 522 |
+
medium = sum(1 for _, sev, _ in issues if sev == "MEDIUM")
|
| 523 |
+
|
| 524 |
score = 100
|
|
|
|
|
|
|
|
|
|
|
|
|
| 525 |
score -= critical * 30
|
| 526 |
score -= high * 15
|
| 527 |
score -= medium * 5
|
| 528 |
score = max(0, score)
|
| 529 |
+
|
| 530 |
+
# Grade + Color
|
| 531 |
if score >= 90:
|
| 532 |
+
grade, quality, color = "A", "EXCELLENT", "#00C853"
|
| 533 |
elif score >= 75:
|
| 534 |
+
grade, quality, color = "B", "GOOD", "#64DD17"
|
| 535 |
elif score >= 60:
|
| 536 |
+
grade, quality, color = "C", "FAIR", "#FFD600"
|
| 537 |
elif score >= 40:
|
| 538 |
+
grade, quality, color = "D", "POOR", "#FF6D00"
|
| 539 |
else:
|
| 540 |
+
grade, quality, color = "F", "CRITICAL", "#D50000"
|
| 541 |
+
|
| 542 |
score_lines = [
|
| 543 |
"QUALITY ASSESSMENT",
|
| 544 |
"β" * 28,
|
|
|
|
| 554 |
f"π‘ Medium: {medium}",
|
| 555 |
"",
|
| 556 |
"β" * 28,
|
| 557 |
+
"Generated:",
|
| 558 |
f"{audio_data['timestamp']}"
|
| 559 |
]
|
| 560 |
+
|
| 561 |
score_text = "\n".join(score_lines)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 562 |
|
| 563 |
+
ax_score.text(
|
| 564 |
+
0.5, 0.5, score_text,
|
| 565 |
+
transform=ax_score.transAxes,
|
| 566 |
+
fontsize=11,
|
| 567 |
+
ha="center",
|
| 568 |
+
va="center",
|
| 569 |
+
family="monospace",
|
| 570 |
+
bbox=dict(
|
| 571 |
+
boxstyle="round,pad=1.2",
|
| 572 |
+
facecolor=color,
|
| 573 |
+
edgecolor="black",
|
| 574 |
+
linewidth=3,
|
| 575 |
+
alpha=0.75
|
| 576 |
+
),
|
| 577 |
+
fontweight="bold"
|
| 578 |
+
)
|
| 579 |
|
| 580 |
+
# ============================
|
| 581 |
+
# SAVE REPORT
|
| 582 |
+
# ============================
|
| 583 |
+
|
| 584 |
+
plt.savefig(
|
| 585 |
+
output_path,
|
| 586 |
+
dpi=300,
|
| 587 |
+
bbox_inches="tight",
|
| 588 |
+
facecolor="white",
|
| 589 |
+
edgecolor="none"
|
| 590 |
+
)
|
| 591 |
+
plt.close()
|
| 592 |
+
|
| 593 |
+
return output_path
|
| 594 |
+
# ============================================================
|
| 595 |
+
# MAIN ANALYSIS FUNCTION (GRADIO CALLBACK)
|
| 596 |
+
# ============================================================
|
| 597 |
|
| 598 |
def analyze_audio(audio_file, progress=gr.Progress()):
|
| 599 |
+
"""Analyze uploaded audio file."""
|
| 600 |
if audio_file is None:
|
| 601 |
return None, "β οΈ Please upload an audio file to analyze."
|
| 602 |
+
|
| 603 |
try:
|
| 604 |
progress(0.1, desc="Reading audio file...")
|
| 605 |
+
|
| 606 |
output_dir = Path("reports")
|
| 607 |
output_dir.mkdir(exist_ok=True)
|
| 608 |
+
|
| 609 |
path = Path(audio_file)
|
| 610 |
+
|
| 611 |
progress(0.2, desc="Loading audio data...")
|
| 612 |
info = read_audio_info(str(path))
|
| 613 |
y, sr = librosa.load(str(path), sr=None, mono=True)
|
| 614 |
+
|
| 615 |
progress(0.4, desc="Analyzing time-domain...")
|
| 616 |
time_stats = compute_time_domain_stats(y)
|
| 617 |
+
|
| 618 |
progress(0.6, desc="Performing spectral analysis...")
|
| 619 |
spectral = compute_spectral_analysis(y, sr)
|
| 620 |
+
|
| 621 |
progress(0.7, desc="Computing loudness...")
|
| 622 |
lufs = compute_loudness(y, sr) if LOUDNESS_AVAILABLE else None
|
| 623 |
+
|
| 624 |
progress(0.8, desc="Detecting audio issues...")
|
| 625 |
issues = detect_audio_issues(spectral, time_stats)
|
| 626 |
+
|
| 627 |
audio_data = {
|
| 628 |
"filename": path.name,
|
| 629 |
"info": info,
|
|
|
|
| 633 |
"issues": issues,
|
| 634 |
"timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
| 635 |
}
|
| 636 |
+
|
| 637 |
progress(0.9, desc="Generating report...")
|
| 638 |
+
|
| 639 |
output_filename = path.stem + "_report.png"
|
| 640 |
output_path = output_dir / output_filename
|
| 641 |
+
|
| 642 |
create_report(audio_data, str(output_path))
|
| 643 |
+
|
| 644 |
progress(1.0, desc="Complete!")
|
| 645 |
+
|
| 646 |
+
# ============================
|
| 647 |
+
# SCORE COMPUTATION
|
| 648 |
+
# ============================
|
| 649 |
+
|
| 650 |
+
critical = sum(1 for _, sev, _ in issues if sev == "CRITICAL")
|
| 651 |
+
high = sum(1 for _, sev, _ in issues if sev == "HIGH")
|
| 652 |
+
medium = sum(1 for _, sev, _ in issues if sev == "MEDIUM")
|
| 653 |
+
|
| 654 |
score = 100 - (critical * 30) - (high * 15) - (medium * 5)
|
| 655 |
score = max(0, score)
|
| 656 |
+
|
| 657 |
if score >= 90:
|
| 658 |
grade, quality, color = "A", "EXCELLENT", "π’"
|
| 659 |
elif score >= 75:
|
|
|
|
| 664 |
grade, quality, color = "D", "POOR", "π "
|
| 665 |
else:
|
| 666 |
grade, quality, color = "F", "CRITICAL", "π΄"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 667 |
|
| 668 |
+
energy = spectral["energy_distribution"]
|
| 669 |
+
|
| 670 |
+
# ============================
|
| 671 |
+
# SUMMARY OUTPUT (Markdown)
|
| 672 |
+
# ============================
|
| 673 |
+
|
| 674 |
+
summary = f"""
|
| 675 |
+
# π΅ Analysis Complete!
|
| 676 |
## File Information
|
| 677 |
- **Filename:** `{audio_data['filename']}`
|
| 678 |
+
- **Duration:** {info['duration']:.2f} sec
|
| 679 |
+
- **Sample Rate:** {info['samplerate']:,} Hz
|
| 680 |
+
- **Channels:** {info['channels']}
|
| 681 |
- **Format:** {info['format']} ({info['subtype']})
|
| 682 |
|
| 683 |
---
|
| 684 |
|
| 685 |
+
## Quality Assessment
|
| 686 |
+
### Overall Score: **{score}/100** β Grade **{grade}** {color}
|
|
|
|
| 687 |
**Quality Rating:** {quality}
|
| 688 |
|
| 689 |
### Audio Metrics
|
| 690 |
| Metric | Value |
|
| 691 |
+
|--------|--------|
|
| 692 |
| Peak Level | {time_stats['peak_db']:.2f} dBFS |
|
| 693 |
| RMS Level | {time_stats['rms_db']:.2f} dBFS |
|
| 694 |
| Crest Factor | {time_stats['crest_factor_db']:.2f} dB |
|
| 695 |
| SNR (Est.) | {time_stats['snr_db']:.1f} dB |
|
| 696 |
"""
|
| 697 |
+
|
| 698 |
if lufs is not None:
|
| 699 |
summary += f"| Integrated LUFS | {lufs:.2f} LUFS |\n"
|
| 700 |
+
|
| 701 |
summary += f"""
|
| 702 |
---
|
| 703 |
|
| 704 |
+
## Spectral Analysis
|
| 705 |
| Parameter | Value |
|
| 706 |
+
|-----------|--------|
|
| 707 |
| Spectral Centroid | {spectral['spectral_centroid']:.1f} Hz |
|
| 708 |
| 85% Rolloff | {spectral['rolloff_85pct']:.1f} Hz |
|
| 709 |
| 95% Rolloff | {spectral['rolloff_95pct']:.1f} Hz |
|
| 710 |
+
| Highest Freq (β60 dB) | {spectral['highest_freq_minus60db']:.1f} Hz |
|
| 711 |
|
| 712 |
+
### Energy Distribution (Speech Bands)
|
| 713 |
+
|
| 714 |
+
- **<100 Hz:** {energy['below_100hz']:.2f}%
|
| 715 |
+
- **100β500 Hz:** {energy['100_500hz']:.2f}%
|
| 716 |
+
- **500β2k Hz:** {energy['500_2khz']:.2f}%
|
| 717 |
+
- **2kβ8k Hz:** {energy['2k_8khz']:.2f}%
|
| 718 |
+
- **8kβ12k Hz:** {energy['8k_12khz']:.2f}%
|
| 719 |
+
- **12kβ16k Hz:** {energy['12k_16khz']:.2f}%
|
| 720 |
+
- **>16k Hz:** {energy['above_16khz']:.2f}%
|
| 721 |
|
| 722 |
---
|
| 723 |
|
| 724 |
## Issues Detected: **{len(issues)}**
|
| 725 |
"""
|
| 726 |
+
|
| 727 |
if issues:
|
| 728 |
summary += "\n### β οΈ Detected Issues:\n\n"
|
| 729 |
+
icons = {"CRITICAL": "π΄", "HIGH": "π ", "MEDIUM": "π‘", "LOW": "π’"}
|
| 730 |
+
|
| 731 |
+
for issue_type, sev, desc in issues:
|
| 732 |
+
summary += f"{icons.get(sev,'βͺ')} **[{sev}] {issue_type}**\n"
|
|
|
|
| 733 |
summary += f" - {desc}\n\n"
|
| 734 |
else:
|
| 735 |
+
summary += "\n### β
No significant issues detected.\n"
|
| 736 |
+
|
| 737 |
+
if spectral["spectral_notches"]:
|
| 738 |
+
summary += f"\n### π΅ Spectral Notches: {len(spectral['spectral_notches'])}\n"
|
| 739 |
+
for i, n in enumerate(spectral["spectral_notches"][:5], 1):
|
| 740 |
+
summary += f"{i}. **{n['freq']:.1f} Hz** (Depth: {n['depth_db']:.1f} dB)\n"
|
| 741 |
+
|
| 742 |
+
summary += f"""
|
| 743 |
+
|
| 744 |
+
---
|
| 745 |
+
|
| 746 |
+
π **Report File:** `{output_filename}`
|
| 747 |
+
π **Generated:** {audio_data['timestamp']}
|
| 748 |
+
|
| 749 |
+
"""
|
| 750 |
+
|
| 751 |
return str(output_path), summary
|
| 752 |
+
|
| 753 |
except Exception as e:
|
| 754 |
import traceback
|
| 755 |
traceback.print_exc()
|
| 756 |
return None, f"# β Analysis Failed\n\n**Error:** {str(e)}"
|
| 757 |
+
# ============================================================
|
| 758 |
+
# ============== GRADIO USER INTERFACE =====================
|
| 759 |
+
# ============================================================
|
| 760 |
|
| 761 |
with gr.Blocks(title="Audio Forensic Analyzer") as demo:
|
| 762 |
+
|
| 763 |
gr.Markdown("""
|
| 764 |
+
# π΅ Audio Forensic Analyzer
|
| 765 |
+
Upload an audio file to perform detailed forensic-level analysis.
|
| 766 |
+
|
| 767 |
+
This tool evaluates:
|
| 768 |
+
- Spectrum balance
|
| 769 |
+
- HF rolloff & filtering
|
| 770 |
+
- Compression
|
| 771 |
+
- Clipping
|
| 772 |
+
- Noise levels
|
| 773 |
+
- Spectral anomalies (notches, brickwalls)
|
| 774 |
+
|
| 775 |
+
**Supported formats:** WAV, MP3, FLAC, OGG, M4A, AAC
|
| 776 |
""")
|
| 777 |
+
|
| 778 |
with gr.Row():
|
| 779 |
with gr.Column(scale=1):
|
| 780 |
audio_input = gr.Audio(
|
|
|
|
| 782 |
type="filepath",
|
| 783 |
sources=["upload"]
|
| 784 |
)
|
| 785 |
+
|
| 786 |
analyze_btn = gr.Button(
|
| 787 |
"π Analyze Audio",
|
| 788 |
variant="primary",
|
| 789 |
size="lg"
|
| 790 |
)
|
| 791 |
+
|
| 792 |
with gr.Column(scale=2):
|
| 793 |
report_output = gr.Image(
|
| 794 |
label="π Analysis Report",
|
| 795 |
type="filepath",
|
| 796 |
height=600
|
| 797 |
)
|
| 798 |
+
|
| 799 |
with gr.Row():
|
| 800 |
summary_output = gr.Markdown(label="π Analysis Summary")
|
| 801 |
+
|
| 802 |
analyze_btn.click(
|
| 803 |
fn=analyze_audio,
|
| 804 |
inputs=[audio_input],
|
| 805 |
outputs=[report_output, summary_output]
|
| 806 |
)
|
| 807 |
|
| 808 |
+
|
| 809 |
+
# ============================================================
|
| 810 |
+
# ============== APP LAUNCH ================================
|
| 811 |
+
# ============================================================
|
| 812 |
+
|
| 813 |
if __name__ == "__main__":
|
| 814 |
+
demo.launch()
|