Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,497 +1,221 @@
|
|
| 1 |
-
# ============================================================
|
| 2 |
-
# AUDIO FORENSIC ANALYZER β FINAL VERSION WITH SYNTHETIC DETECTOR
|
| 3 |
-
# ============================================================
|
| 4 |
-
|
| 5 |
import gradio as gr
|
| 6 |
-
import
|
|
|
|
| 7 |
from pathlib import Path
|
| 8 |
from datetime import datetime
|
| 9 |
-
import warnings
|
| 10 |
-
warnings.filterwarnings('ignore')
|
| 11 |
-
|
| 12 |
-
import numpy as np
|
| 13 |
-
import soundfile as sf
|
| 14 |
-
import librosa
|
| 15 |
-
import librosa.display
|
| 16 |
-
import matplotlib.pyplot as plt
|
| 17 |
-
import matplotlib.gridspec as gridspec
|
| 18 |
-
import scipy.signal as sps
|
| 19 |
-
|
| 20 |
-
try:
|
| 21 |
-
import pyloudnorm as pyln
|
| 22 |
-
LOUDNESS_AVAILABLE = True
|
| 23 |
-
except ImportError:
|
| 24 |
-
LOUDNESS_AVAILABLE = False
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
# ============================================================
|
| 28 |
-
# READ AUDIO INFO
|
| 29 |
-
# ============================================================
|
| 30 |
-
|
| 31 |
-
def read_audio_info(path):
|
| 32 |
-
info = sf.info(path)
|
| 33 |
-
return {
|
| 34 |
-
"samplerate": int(info.samplerate),
|
| 35 |
-
"channels": int(info.channels),
|
| 36 |
-
"frames": int(info.frames),
|
| 37 |
-
"subtype": info.subtype,
|
| 38 |
-
"format": info.format,
|
| 39 |
-
"duration": float(info.frames) / info.samplerate if info.frames else 0.0
|
| 40 |
-
}
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
# ============================================================
|
| 44 |
-
# TIME-DOMAIN STATS
|
| 45 |
-
# ============================================================
|
| 46 |
-
|
| 47 |
-
def compute_time_domain_stats(y):
|
| 48 |
-
peak = float(np.max(np.abs(y)))
|
| 49 |
-
rms = float(np.sqrt(np.mean(y ** 2)))
|
| 50 |
-
peak_db = 20 * np.log10(max(peak, 1e-12))
|
| 51 |
-
rms_db = 20 * np.log10(max(rms, 1e-12))
|
| 52 |
-
crest_factor = peak_db - rms_db
|
| 53 |
-
abs_y = np.abs(y)
|
| 54 |
-
noise_floor = float(np.percentile(abs_y, 10))
|
| 55 |
-
snr_est = 20 * np.log10(max(rms, 1e-12) / max(noise_floor, 1e-12))
|
| 56 |
-
zcr = float(np.mean(librosa.feature.zero_crossing_rate(y)))
|
| 57 |
-
|
| 58 |
-
return {
|
| 59 |
-
"peak": peak,
|
| 60 |
-
"rms": rms,
|
| 61 |
-
"peak_db": peak_db,
|
| 62 |
-
"rms_db": rms_db,
|
| 63 |
-
"crest_factor_db": crest_factor,
|
| 64 |
-
"noise_floor": noise_floor,
|
| 65 |
-
"snr_db": snr_est,
|
| 66 |
-
"zero_crossing_rate": zcr
|
| 67 |
-
}
|
| 68 |
-
|
| 69 |
-
|
| 70 |
-
# ============================================================
|
| 71 |
-
# SPECTRAL ANALYSIS
|
| 72 |
-
# ============================================================
|
| 73 |
-
|
| 74 |
-
def compute_spectral_analysis(y, sr, n_fft=4096):
|
| 75 |
-
hop = n_fft // 4
|
| 76 |
-
S = np.abs(librosa.stft(y, n_fft=n_fft, hop_length=hop, window="hann"))
|
| 77 |
-
freqs = np.linspace(0, sr / 2, S.shape[0])
|
| 78 |
-
S_db = librosa.amplitude_to_db(S, ref=np.max)
|
| 79 |
-
|
| 80 |
-
S_power = S ** 2
|
| 81 |
-
energy = np.percentile(S_power, 90, axis=1) + 1e-20
|
| 82 |
-
total_energy = float(np.sum(energy))
|
| 83 |
-
cum_energy = np.cumsum(energy)
|
| 84 |
-
|
| 85 |
-
idx85 = np.searchsorted(cum_energy, 0.85 * total_energy)
|
| 86 |
-
idx95 = np.searchsorted(cum_energy, 0.95 * total_energy)
|
| 87 |
-
|
| 88 |
-
freq85 = float(freqs[min(idx85, len(freqs)-1)])
|
| 89 |
-
freq95 = float(freqs[min(idx95, len(freqs)-1)])
|
| 90 |
-
|
| 91 |
-
mean_db = np.percentile(S_db, 90, axis=1)
|
| 92 |
-
pk = float(np.max(S_db))
|
| 93 |
-
thr = pk - 60
|
| 94 |
-
bins = np.where(mean_db > thr)[0]
|
| 95 |
-
highest_freq = float(freqs[bins[-1]]) if len(bins) else 0.0
|
| 96 |
-
|
| 97 |
-
def band(low, high):
|
| 98 |
-
i1 = np.searchsorted(freqs, low)
|
| 99 |
-
i2 = np.searchsorted(freqs, high)
|
| 100 |
-
return float(100 * np.sum(energy[i1:i2]) / total_energy)
|
| 101 |
-
|
| 102 |
-
def band_above(f):
|
| 103 |
-
idx = np.searchsorted(freqs, f)
|
| 104 |
-
return float(100 * np.sum(energy[idx:]) / total_energy)
|
| 105 |
-
|
| 106 |
-
energy_stats = {
|
| 107 |
-
"below_100hz": band(0, 100),
|
| 108 |
-
"100_500hz": band(100, 500),
|
| 109 |
-
"500_2khz": band(500, 2000),
|
| 110 |
-
"2k_8khz": band(2000, 8000),
|
| 111 |
-
"8k_12khz": band(8000, 12000),
|
| 112 |
-
"12k_16khz": band(12000, 16000),
|
| 113 |
-
"above_16khz": band_above(16000)
|
| 114 |
-
}
|
| 115 |
-
|
| 116 |
-
diffs = np.diff(mean_db)
|
| 117 |
-
bw_idx = np.where(diffs < -20)[0]
|
| 118 |
-
brick = bool(len(bw_idx))
|
| 119 |
-
brick_freq = float(freqs[bw_idx[0]]) if len(bw_idx) else None
|
| 120 |
-
|
| 121 |
-
smooth = sps.medfilt(mean_db, kernel_size=9)
|
| 122 |
-
minima = sps.argrelextrema(smooth, np.less)[0]
|
| 123 |
-
notches = []
|
| 124 |
-
for m in minima:
|
| 125 |
-
left = smooth[max(0, m - 6):m]
|
| 126 |
-
right = smooth[m+1:min(len(smooth), m+7)]
|
| 127 |
-
neigh = max(left.max() if len(left) else -999,
|
| 128 |
-
right.max() if len(right) else -999)
|
| 129 |
-
depth = neigh - smooth[m]
|
| 130 |
-
if depth >= 15 and freqs[m] > 100:
|
| 131 |
-
notches.append({"freq": float(freqs[m]), "depth_db": float(depth)})
|
| 132 |
-
|
| 133 |
-
centroid = float(np.mean(librosa.feature.spectral_centroid(S=S, sr=sr)))
|
| 134 |
-
bandwidth = float(np.mean(librosa.feature.spectral_bandwidth(S=S, sr=sr)))
|
| 135 |
-
flatness = float(np.mean(librosa.feature.spectral_flatness(S=S)))
|
| 136 |
-
rolloff = float(np.mean(librosa.feature.spectral_rolloff(S=S, sr=sr)))
|
| 137 |
-
|
| 138 |
-
return {
|
| 139 |
-
"S_db": S_db,
|
| 140 |
-
"freqs": freqs,
|
| 141 |
-
"hop_length": hop,
|
| 142 |
-
"rolloff_85pct": freq85,
|
| 143 |
-
"rolloff_95pct": freq95,
|
| 144 |
-
"highest_freq_minus60db": highest_freq,
|
| 145 |
-
"energy_distribution": energy_stats,
|
| 146 |
-
"brick_wall_detected": brick,
|
| 147 |
-
"brick_wall_freq": brick_freq,
|
| 148 |
-
"spectral_notches": notches,
|
| 149 |
-
"spectral_centroid": centroid,
|
| 150 |
-
"spectral_bandwidth": bandwidth,
|
| 151 |
-
"spectral_flatness": flatness,
|
| 152 |
-
"spectral_rolloff": rolloff
|
| 153 |
-
}
|
| 154 |
-
|
| 155 |
-
|
| 156 |
-
# ============================================================
|
| 157 |
-
# SYNTHETIC VOICE DETECTOR (LIGHTWEIGHT)
|
| 158 |
-
# ============================================================
|
| 159 |
-
|
| 160 |
-
def detect_synthetic_voice(y, sr, spectral):
|
| 161 |
-
try:
|
| 162 |
-
mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=40)
|
| 163 |
-
mfcc_std = np.mean(np.std(mfcc, axis=1))
|
| 164 |
-
f0 = librosa.yin(y, 50, 400, sr=sr)
|
| 165 |
-
jitter = np.std(np.diff(f0) / (np.mean(f0) + 1e-6))
|
| 166 |
-
|
| 167 |
-
energy = spectral["energy_distribution"]
|
| 168 |
-
sym = abs(energy["8k_12khz"] - energy["12k_16khz"])
|
| 169 |
-
|
| 170 |
-
cs = []
|
| 171 |
-
for i in range(mfcc.shape[1] - 1):
|
| 172 |
-
v1 = mfcc[:, i]
|
| 173 |
-
v2 = mfcc[:, i+1]
|
| 174 |
-
cs.append(np.dot(v1, v2) /
|
| 175 |
-
(np.linalg.norm(v1) * np.linalg.norm(v2) + 1e-8))
|
| 176 |
-
cos_sim = float(np.mean(cs))
|
| 177 |
-
|
| 178 |
-
score = (
|
| 179 |
-
1.2 * (cos_sim - 0.85) +
|
| 180 |
-
0.8 * (0.15 - mfcc_std) +
|
| 181 |
-
1.0 * (0.02 - jitter) +
|
| 182 |
-
0.5 * (0.10 - sym)
|
| 183 |
-
)
|
| 184 |
-
prob = 1 / (1 + np.exp(-5 * score))
|
| 185 |
-
prob = float(np.clip(prob, 0, 1))
|
| 186 |
-
label = "AI" if prob > 0.5 else "Human"
|
| 187 |
-
return prob, label
|
| 188 |
-
except:
|
| 189 |
-
return 0.0, "Human"
|
| 190 |
-
|
| 191 |
-
|
| 192 |
-
# ============================================================
|
| 193 |
-
# ISSUE DETECTION (Your original logic preserved)
|
| 194 |
-
# ============================================================
|
| 195 |
-
|
| 196 |
-
def detect_audio_issues(spectral, time_stats):
|
| 197 |
-
issues = []
|
| 198 |
-
energy = spectral["energy_distribution"]
|
| 199 |
-
freqs = spectral["freqs"]
|
| 200 |
-
flatness = spectral["spectral_flatness"]
|
| 201 |
-
notches = spectral["spectral_notches"]
|
| 202 |
-
hf_8_12 = energy["8k_12khz"]
|
| 203 |
-
highf = spectral["highest_freq_minus60db"]
|
| 204 |
-
|
| 205 |
-
if hf_8_12 < 0.01 and highf < 9000:
|
| 206 |
-
issues.append(("HF_LOSS", "HIGH", f"Severe HF cutoff"))
|
| 207 |
-
elif hf_8_12 < 0.02:
|
| 208 |
-
issues.append(("HF_LOSS", "LOW", "Low HF energy"))
|
| 209 |
-
|
| 210 |
-
if spectral["brick_wall_detected"]:
|
| 211 |
-
issues.append(("BRICK_WALL", "HIGH",
|
| 212 |
-
f"Brick-wall at {spectral['brick_wall_freq']:.0f} Hz"))
|
| 213 |
|
| 214 |
-
|
| 215 |
-
|
| 216 |
-
|
| 217 |
-
issues.append(("NR_SOFT", "LOW", "Mild noise reduction"))
|
| 218 |
|
| 219 |
-
|
| 220 |
-
|
| 221 |
-
|
| 222 |
-
|
| 223 |
-
|
| 224 |
-
|
| 225 |
-
|
| 226 |
-
|
| 227 |
-
elif crest < 6:
|
| 228 |
-
issues.append(("COMPRESSION", "MEDIUM",
|
| 229 |
-
f"Crest {crest:.1f} dB"))
|
| 230 |
-
|
| 231 |
-
if time_stats["peak"] >= 0.999:
|
| 232 |
-
issues.append(("CLIPPING", "CRITICAL",
|
| 233 |
-
"Probable clipping"))
|
| 234 |
-
|
| 235 |
-
return issues
|
| 236 |
-
|
| 237 |
-
|
| 238 |
-
# ============================================================
|
| 239 |
-
# REPORT GENERATION (PNG)
|
| 240 |
-
# ============================================================
|
| 241 |
-
|
| 242 |
-
def create_report(data, outpath):
|
| 243 |
-
plt.style.use("default")
|
| 244 |
-
fig = plt.figure(figsize=(22, 16))
|
| 245 |
-
fig.patch.set_facecolor("white")
|
| 246 |
-
|
| 247 |
-
fig.suptitle(
|
| 248 |
-
f"AUDIO FORENSIC ANALYSIS REPORT\n{data['filename']}",
|
| 249 |
-
fontsize=20, fontweight="bold", y=0.97
|
| 250 |
-
)
|
| 251 |
-
|
| 252 |
-
gs = gridspec.GridSpec(
|
| 253 |
-
4, 4, figure=fig,
|
| 254 |
-
hspace=0.5, wspace=0.4,
|
| 255 |
-
height_ratios=[1.6, 1, 1, 1]
|
| 256 |
-
)
|
| 257 |
-
|
| 258 |
-
# Spectrogram
|
| 259 |
-
ax = fig.add_subplot(gs[0, :])
|
| 260 |
-
S_db = data["spectral"]["S_db"]
|
| 261 |
-
sr = data["info"]["samplerate"]
|
| 262 |
-
hop = data["spectral"]["hop_length"]
|
| 263 |
-
|
| 264 |
-
img = librosa.display.specshow(
|
| 265 |
-
S_db, sr=sr, hop_length=hop,
|
| 266 |
-
x_axis="time", y_axis="hz",
|
| 267 |
-
cmap="viridis", ax=ax, vmin=-80, vmax=0
|
| 268 |
-
)
|
| 269 |
-
ax.set_title("Spectrogram", fontsize=14)
|
| 270 |
-
plt.colorbar(img, ax=ax)
|
| 271 |
-
|
| 272 |
-
# File info block
|
| 273 |
-
ax2 = fig.add_subplot(gs[1, 0:2])
|
| 274 |
-
ax2.axis("off")
|
| 275 |
-
|
| 276 |
-
info = data["info"]
|
| 277 |
-
t = data["time_stats"]
|
| 278 |
-
|
| 279 |
-
block = [
|
| 280 |
-
"FILE INFORMATION",
|
| 281 |
-
f"Sample Rate: {info['samplerate']}",
|
| 282 |
-
f"Channels: {info['channels']}",
|
| 283 |
-
f"Duration: {info['duration']:.2f} sec",
|
| 284 |
-
"",
|
| 285 |
-
"TIME-DOMAIN",
|
| 286 |
-
f"Peak: {t['peak_db']:.2f} dBFS",
|
| 287 |
-
f"RMS: {t['rms_db']:.2f} dBFS",
|
| 288 |
-
f"Crest: {t['crest_factor_db']:.2f} dB",
|
| 289 |
-
f"SNR: {t['snr_db']:.1f} dB",
|
| 290 |
-
f"Zero-Cross: {t['zero_crossing_rate']:.4f}",
|
| 291 |
-
]
|
| 292 |
-
|
| 293 |
-
if data["lufs"] is not None:
|
| 294 |
-
block.append(f"Integrated LUFS: {data['lufs']:.2f}")
|
| 295 |
-
|
| 296 |
-
ax2.text(0.02, 0.98, "\n".join(block), va="top",
|
| 297 |
-
fontsize=11, family="monospace",
|
| 298 |
-
bbox=dict(boxstyle="round", fc="#E8F4F8", ec="#0077BE"))
|
| 299 |
-
|
| 300 |
-
# Spectral stats
|
| 301 |
-
ax3 = fig.add_subplot(gs[1, 2:4])
|
| 302 |
-
ax3.axis("off")
|
| 303 |
-
sp = data["spectral"]
|
| 304 |
-
ed = sp["energy_distribution"]
|
| 305 |
-
|
| 306 |
-
block2 = [
|
| 307 |
-
"SPECTRAL ANALYSIS",
|
| 308 |
-
f"Centroid: {sp['spectral_centroid']:.1f}",
|
| 309 |
-
f"Bandwidth: {sp['spectral_bandwidth']:.1f}",
|
| 310 |
-
f"Flatness: {sp['spectral_flatness']:.4f}",
|
| 311 |
-
f"Rolloff 85%: {sp['rolloff_85pct']:.1f}",
|
| 312 |
-
f"Rolloff 95%: {sp['rolloff_95pct']:.1f}",
|
| 313 |
-
f"Highest -60dB: {sp['highest_freq_minus60db']:.1f}",
|
| 314 |
-
"",
|
| 315 |
-
"ENERGY DISTRIBUTION",
|
| 316 |
-
*(f"{k}: {v:.2f}%" for k, v in ed.items())
|
| 317 |
-
]
|
| 318 |
-
|
| 319 |
-
ax3.text(0.02, 0.98, "\n".join(block2), va="top",
|
| 320 |
-
fontsize=11, family="monospace",
|
| 321 |
-
bbox=dict(boxstyle="round", fc="#FFF4E6", ec="#FF8C00"))
|
| 322 |
-
|
| 323 |
-
# Issues
|
| 324 |
-
ax4 = fig.add_subplot(gs[2, :])
|
| 325 |
-
ax4.axis("off")
|
| 326 |
-
|
| 327 |
-
issues = data["issues"]
|
| 328 |
-
lines = ["DETECTED ISSUES", ""]
|
| 329 |
-
|
| 330 |
-
if not issues:
|
| 331 |
-
lines.append("No major issues detected.")
|
| 332 |
-
else:
|
| 333 |
-
for typ, sev, desc in issues:
|
| 334 |
-
lines.append(f"[{sev}] {typ} β {desc}")
|
| 335 |
-
|
| 336 |
-
if sp["spectral_notches"]:
|
| 337 |
-
lines.append("")
|
| 338 |
-
lines.append(f"Spectral Notches: {len(sp['spectral_notches'])}")
|
| 339 |
-
|
| 340 |
-
ax4.text(0.02, 0.98, "\n".join(lines), fontsize=11,
|
| 341 |
-
va="top", family="monospace",
|
| 342 |
-
bbox=dict(boxstyle="round", fc="#FFE6E6", ec="#DC143C"))
|
| 343 |
-
|
| 344 |
-
# Quality score + synthetic
|
| 345 |
-
ax5 = fig.add_subplot(gs[3, :])
|
| 346 |
-
ax5.axis("off")
|
| 347 |
-
|
| 348 |
-
crit = sum(1 for _, s, _ in issues if s == "CRITICAL")
|
| 349 |
-
hi = sum(1 for _, s, _ in issues if s == "HIGH")
|
| 350 |
-
med = sum(1 for _, s, _ in issues if s == "MEDIUM")
|
| 351 |
-
low = sum(1 for _, s, _ in issues if s == "LOW")
|
| 352 |
-
|
| 353 |
-
score = 100 - (crit * 35 + hi * 20 + med * 8 + low * 3)
|
| 354 |
-
score = np.clip(score, 0, 100)
|
| 355 |
-
|
| 356 |
-
prob = data["synthetic_prob"]
|
| 357 |
-
label = data["synthetic_label"]
|
| 358 |
-
|
| 359 |
-
block3 = [
|
| 360 |
-
"QUALITY & SYNTHETIC ANALYSIS",
|
| 361 |
-
f"Score: {score:.1f}/100",
|
| 362 |
-
f"Issues β C:{crit}, H:{hi}, M:{med}, L:{low}",
|
| 363 |
-
"",
|
| 364 |
-
"SYNTHETIC DETECTOR",
|
| 365 |
-
f"Probability: {prob:.2f}",
|
| 366 |
-
f"Label: {label}",
|
| 367 |
-
"",
|
| 368 |
-
f"Generated: {data['timestamp']}"
|
| 369 |
-
]
|
| 370 |
-
|
| 371 |
-
ax5.text(0.5, 0.5, "\n".join(block3),
|
| 372 |
-
fontsize=11, ha="center", va="center",
|
| 373 |
-
family="monospace",
|
| 374 |
-
bbox=dict(boxstyle="round", fc="#DFFFD8", ec="black"))
|
| 375 |
-
|
| 376 |
-
plt.savefig(outpath, dpi=300, bbox_inches="tight")
|
| 377 |
-
plt.close()
|
| 378 |
-
return outpath
|
| 379 |
|
| 380 |
|
| 381 |
# ============================================================
|
| 382 |
# MAIN ANALYSIS FUNCTION
|
| 383 |
# ============================================================
|
| 384 |
|
| 385 |
-
def analyze_audio(
|
| 386 |
-
|
| 387 |
-
return None, "Please upload an audio file."
|
| 388 |
-
|
| 389 |
-
try:
|
| 390 |
-
progress(0.1)
|
| 391 |
-
p = Path(file)
|
| 392 |
-
|
| 393 |
-
info = read_audio_info(str(p))
|
| 394 |
-
y, sr = librosa.load(str(p), sr=None, mono=True)
|
| 395 |
-
|
| 396 |
-
progress(0.3)
|
| 397 |
-
tstats = compute_time_domain_stats(y)
|
| 398 |
|
| 399 |
-
|
| 400 |
-
|
| 401 |
|
| 402 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 403 |
lufs = compute_loudness(y, sr) if LOUDNESS_AVAILABLE else None
|
| 404 |
|
| 405 |
-
|
| 406 |
-
|
| 407 |
-
|
| 408 |
-
progress(0.
|
| 409 |
-
|
| 410 |
-
|
| 411 |
-
|
| 412 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 413 |
"info": info,
|
| 414 |
-
"time_stats":
|
| 415 |
-
"spectral":
|
| 416 |
"lufs": lufs,
|
| 417 |
"issues": issues,
|
| 418 |
-
"
|
| 419 |
-
"
|
| 420 |
"timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
| 421 |
}
|
| 422 |
|
| 423 |
-
|
| 424 |
-
|
| 425 |
-
outpng = outdir / f"{p.stem}_report.png"
|
| 426 |
|
| 427 |
-
|
| 428 |
-
|
|
|
|
| 429 |
|
| 430 |
-
progress(1.0)
|
| 431 |
|
| 432 |
-
|
| 433 |
-
#
|
| 434 |
-
## File: `{p.name}`
|
| 435 |
|
| 436 |
-
##
|
| 437 |
-
-
|
| 438 |
-
-
|
|
|
|
|
|
|
| 439 |
|
| 440 |
---
|
| 441 |
|
| 442 |
-
##
|
| 443 |
-
-
|
| 444 |
-
-
|
| 445 |
-
-
|
| 446 |
-
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 447 |
|
|
|
|
| 448 |
---
|
| 449 |
|
| 450 |
-
##
|
| 451 |
-
-
|
| 452 |
-
-
|
| 453 |
-
- Highest -60 dB: {spec['highest_freq_minus60db']:.1f} Hz
|
| 454 |
|
| 455 |
---
|
| 456 |
|
| 457 |
-
##
|
| 458 |
"""
|
| 459 |
|
| 460 |
-
|
| 461 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 462 |
|
| 463 |
-
|
| 464 |
|
| 465 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 466 |
|
| 467 |
except Exception as e:
|
| 468 |
import traceback
|
| 469 |
traceback.print_exc()
|
| 470 |
-
return None, f"
|
| 471 |
|
| 472 |
|
| 473 |
# ============================================================
|
| 474 |
-
# UI
|
| 475 |
# ============================================================
|
| 476 |
|
| 477 |
-
with gr.Blocks(title="Audio Forensic Analyzer") as demo:
|
|
|
|
| 478 |
gr.Markdown("""
|
| 479 |
-
|
| 480 |
-
|
| 481 |
-
|
| 482 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 483 |
|
| 484 |
with gr.Row():
|
| 485 |
with gr.Column(scale=1):
|
| 486 |
-
|
| 487 |
-
|
| 488 |
-
|
| 489 |
-
|
|
|
|
| 490 |
|
| 491 |
-
|
| 492 |
|
| 493 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 494 |
|
| 495 |
|
|
|
|
| 496 |
if __name__ == "__main__":
|
| 497 |
-
demo.launch()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
import gradio as gr
|
| 2 |
+
import librosa
|
| 3 |
+
import soundfile as sf
|
| 4 |
from pathlib import Path
|
| 5 |
from datetime import datetime
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6 |
|
| 7 |
+
# -------------------------------
|
| 8 |
+
# Import internal modules
|
| 9 |
+
# -------------------------------
|
|
|
|
| 10 |
|
| 11 |
+
from read_audio_info import read_audio_info
|
| 12 |
+
from time_domain import compute_time_domain_stats
|
| 13 |
+
from spectral import compute_spectral_analysis
|
| 14 |
+
from loudness_calc import compute_loudness, LOUDNESS_AVAILABLE
|
| 15 |
+
from issue_detection import detect_audio_issues
|
| 16 |
+
from synthetic_detector import detect_synthetic_voice
|
| 17 |
+
from scoring import compute_quality_score
|
| 18 |
+
from report_generator import create_report
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 19 |
|
| 20 |
|
| 21 |
# ============================================================
|
| 22 |
# MAIN ANALYSIS FUNCTION
|
| 23 |
# ============================================================
|
| 24 |
|
| 25 |
+
def analyze_audio(audio_file, progress=gr.Progress()):
|
| 26 |
+
"""Main Gradio callback β performs full forensic analysis."""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 27 |
|
| 28 |
+
if audio_file is None:
|
| 29 |
+
return None, "β οΈ Please upload an audio file."
|
| 30 |
|
| 31 |
+
try:
|
| 32 |
+
progress(0.1, desc="Reading audio file...")
|
| 33 |
+
|
| 34 |
+
path = Path(audio_file)
|
| 35 |
+
info = read_audio_info(str(path))
|
| 36 |
+
|
| 37 |
+
progress(0.25, desc="Loading waveform...")
|
| 38 |
+
y, sr = librosa.load(str(path), sr=None, mono=True)
|
| 39 |
+
|
| 40 |
+
# ======================================================
|
| 41 |
+
# TIME DOMAIN
|
| 42 |
+
# ======================================================
|
| 43 |
+
progress(0.35, desc="Analyzing time-domain...")
|
| 44 |
+
time_stats = compute_time_domain_stats(y)
|
| 45 |
+
|
| 46 |
+
# ======================================================
|
| 47 |
+
# SPECTRAL
|
| 48 |
+
# ======================================================
|
| 49 |
+
progress(0.50, desc="Computing spectral analysis...")
|
| 50 |
+
spectral = compute_spectral_analysis(y, sr)
|
| 51 |
+
|
| 52 |
+
# ======================================================
|
| 53 |
+
# LOUDNESS
|
| 54 |
+
# ======================================================
|
| 55 |
+
progress(0.60, desc="Computing loudness...")
|
| 56 |
lufs = compute_loudness(y, sr) if LOUDNESS_AVAILABLE else None
|
| 57 |
|
| 58 |
+
# ======================================================
|
| 59 |
+
# ISSUES
|
| 60 |
+
# ======================================================
|
| 61 |
+
progress(0.70, desc="Detecting issues...")
|
| 62 |
+
issues = detect_audio_issues(spectral, time_stats)
|
| 63 |
+
|
| 64 |
+
# ======================================================
|
| 65 |
+
# SYNTHETIC DETECTION (informational only)
|
| 66 |
+
# ======================================================
|
| 67 |
+
progress(0.78, desc="Synthetic voice estimation...")
|
| 68 |
+
synthetic = detect_synthetic_voice(y, sr, spectral)
|
| 69 |
+
|
| 70 |
+
# ======================================================
|
| 71 |
+
# SCORING
|
| 72 |
+
# ======================================================
|
| 73 |
+
progress(0.82, desc="Scoring...")
|
| 74 |
+
score = compute_quality_score(issues)
|
| 75 |
+
|
| 76 |
+
# ======================================================
|
| 77 |
+
# CREATE REPORT PNG
|
| 78 |
+
# ======================================================
|
| 79 |
+
output_dir = Path("reports")
|
| 80 |
+
output_dir.mkdir(exist_ok=True)
|
| 81 |
+
|
| 82 |
+
output_file = output_dir / (path.stem + "_report.png")
|
| 83 |
+
|
| 84 |
+
audio_data = {
|
| 85 |
+
"filename": path.name,
|
| 86 |
"info": info,
|
| 87 |
+
"time_stats": time_stats,
|
| 88 |
+
"spectral": spectral,
|
| 89 |
"lufs": lufs,
|
| 90 |
"issues": issues,
|
| 91 |
+
"score": score,
|
| 92 |
+
"synthetic": synthetic,
|
| 93 |
"timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
| 94 |
}
|
| 95 |
|
| 96 |
+
progress(0.92, desc="Rendering PNG report...")
|
| 97 |
+
create_report(audio_data, str(output_file))
|
|
|
|
| 98 |
|
| 99 |
+
# ======================================================
|
| 100 |
+
# SUMMARY MARKDOWN
|
| 101 |
+
# ======================================================
|
| 102 |
|
| 103 |
+
progress(1.0, desc="Done!")
|
| 104 |
|
| 105 |
+
md = f"""
|
| 106 |
+
# π΅ Analysis Complete β Audio Forensic Analyzer
|
|
|
|
| 107 |
|
| 108 |
+
## File Information
|
| 109 |
+
- **Filename:** `{audio_data['filename']}`
|
| 110 |
+
- **Duration:** {info['duration']:.2f}s
|
| 111 |
+
- **Sample Rate:** {info['samplerate']} Hz
|
| 112 |
+
- **Channels:** {info['channels']}
|
| 113 |
|
| 114 |
---
|
| 115 |
|
| 116 |
+
## π Quality Assessment
|
| 117 |
+
- **Score:** {score['score']}/100
|
| 118 |
+
- **Grade:** {score['grade']}
|
| 119 |
+
- **Quality:** {score['quality']}
|
| 120 |
+
- **Recommendation:** {score['recommendation']}
|
| 121 |
+
|
| 122 |
+
---
|
| 123 |
+
|
| 124 |
+
## π§ Time-Domain Stats
|
| 125 |
+
| Metric | Value |
|
| 126 |
+
|--------|--------|
|
| 127 |
+
| Peak Level | {time_stats['peak_db']:.2f} dBFS |
|
| 128 |
+
| RMS Level | {time_stats['rms_db']:.2f} dBFS |
|
| 129 |
+
| Crest Factor | {time_stats['crest_factor_db']:.2f} dB |
|
| 130 |
+
| SNR | {time_stats['snr_db']:.1f} dB |
|
| 131 |
+
| ZCR | {time_stats['zero_crossing_rate']:.4f} |
|
| 132 |
+
|
| 133 |
+
"""
|
| 134 |
+
|
| 135 |
+
if lufs is not None:
|
| 136 |
+
md += f"| Integrated LUFS | {lufs:.2f} LUFS |\n"
|
| 137 |
|
| 138 |
+
md += f"""
|
| 139 |
---
|
| 140 |
|
| 141 |
+
## π Synthetic Voice Estimate (Informational Only)
|
| 142 |
+
- **Probability:** {synthetic['synthetic_probability']:.2f}
|
| 143 |
+
- **Label:** **{synthetic['synthetic_label']}**
|
|
|
|
| 144 |
|
| 145 |
---
|
| 146 |
|
| 147 |
+
## β οΈ Issues Detected: {len(issues)}
|
| 148 |
"""
|
| 149 |
|
| 150 |
+
if issues:
|
| 151 |
+
icons = {"CRITICAL": "π΄", "HIGH": "π ", "MEDIUM": "π‘", "LOW": "π’"}
|
| 152 |
+
for issue, sev, desc in issues:
|
| 153 |
+
md += f"- {icons.get(sev,'βͺ')} **[{sev}] {issue}** β {desc}\n"
|
| 154 |
+
else:
|
| 155 |
+
md += "- β
No significant issues\n"
|
| 156 |
|
| 157 |
+
md += f"""
|
| 158 |
|
| 159 |
+
---
|
| 160 |
+
|
| 161 |
+
π **Report PNG saved:** `{output_file.name}`
|
| 162 |
+
|
| 163 |
+
"""
|
| 164 |
+
|
| 165 |
+
return str(output_file), md
|
| 166 |
|
| 167 |
except Exception as e:
|
| 168 |
import traceback
|
| 169 |
traceback.print_exc()
|
| 170 |
+
return None, f"# β Analysis Failed\n{str(e)}"
|
| 171 |
|
| 172 |
|
| 173 |
# ============================================================
|
| 174 |
+
# GRADIO UI
|
| 175 |
# ============================================================
|
| 176 |
|
| 177 |
+
with gr.Blocks(title="Audio Forensic Analyzer", theme="soft") as demo:
|
| 178 |
+
|
| 179 |
gr.Markdown("""
|
| 180 |
+
# π§ Audio Forensic Analyzer
|
| 181 |
+
Upload an audio file and generate a **full forensic report**:
|
| 182 |
+
- HF/LF rolloff
|
| 183 |
+
- Filtering (LPF/HPF/Brickwall)
|
| 184 |
+
- Compression & clipping
|
| 185 |
+
- Noise reduction artifacts
|
| 186 |
+
- Spectral notches
|
| 187 |
+
- Loudness (LUFS)
|
| 188 |
+
- Synthetic Voice Probability (Informational Only)
|
| 189 |
+
|
| 190 |
+
Report includes a PNG + formatted summary.
|
| 191 |
+
""")
|
| 192 |
|
| 193 |
with gr.Row():
|
| 194 |
with gr.Column(scale=1):
|
| 195 |
+
audio_input = gr.Audio(
|
| 196 |
+
label="π Upload Audio",
|
| 197 |
+
type="filepath",
|
| 198 |
+
sources=["upload"]
|
| 199 |
+
)
|
| 200 |
|
| 201 |
+
analyze_button = gr.Button("π Analyze Audio", variant="primary")
|
| 202 |
|
| 203 |
+
with gr.Column(scale=2):
|
| 204 |
+
png_output = gr.Image(
|
| 205 |
+
label="π Forensic Report (PNG)",
|
| 206 |
+
type="filepath",
|
| 207 |
+
height=600
|
| 208 |
+
)
|
| 209 |
+
|
| 210 |
+
summary_output = gr.Markdown(label="π Summary Report")
|
| 211 |
+
|
| 212 |
+
analyze_button.click(
|
| 213 |
+
fn=analyze_audio,
|
| 214 |
+
inputs=[audio_input],
|
| 215 |
+
outputs=[png_output, summary_output]
|
| 216 |
+
)
|
| 217 |
|
| 218 |
|
| 219 |
+
# Run in HuggingFace Space
|
| 220 |
if __name__ == "__main__":
|
| 221 |
+
demo.launch(server_name="0.0.0.0", server_port=7860, show_error=True)
|