Spaces:

Mr7Explorer
/

audio_analyzer

Sleeping

App Files Files Community

Mr7Explorer commited on Dec 10, 2025

Commit

b72f14c

verified ·

1 Parent(s): 391be2d

Create spectral.py

Browse files

Files changed (1) hide show

spectral.py +150 -0

spectral.py ADDED Viewed

	@@ -0,0 +1,150 @@

+# spectral.py
+# ============================================================
+# Spectral Analysis Module for Audio Forensic Analyzer
+# Logic preserved exactly from original app.py (cleaned + modular)
+# ============================================================
+import numpy as np
+import librosa
+import scipy.signal as sps
+def compute_spectral_analysis(y, sr, n_fft=4096):
+    """Comprehensive spectral analysis tuned for speech QC."""
+    hop_length = n_fft // 4
+    # ============================================================
+    # STFT → Magnitude + dB Conversion
+    # ============================================================
+    S = np.abs(librosa.stft(
+        y,
+        n_fft=n_fft,
+        hop_length=hop_length,
+        window="hann"
+    ))
+    freqs = np.linspace(0, sr / 2, S.shape[0])
+    # Convert amplitude to dB scale
+    S_db = librosa.amplitude_to_db(S, ref=np.max)
+    # ============================================================
+    # 90th Percentile Energy Envelope (Major Improvement)
+    # ============================================================
+    S_power = S ** 2
+    energy = np.percentile(S_power, 90, axis=1) + 1e-20
+    total_energy = float(np.sum(energy))
+    cum_energy = np.cumsum(energy)
+    roll85_idx = np.searchsorted(cum_energy, 0.85 * total_energy)
+    roll95_idx = np.searchsorted(cum_energy, 0.95 * total_energy)
+    freq_at_85 = float(freqs[min(roll85_idx, len(freqs) - 1)])
+    freq_at_95 = float(freqs[min(roll95_idx, len(freqs) - 1)])
+    # ============================================================
+    # Updated HF Envelope: 90th percentile of dB
+    # ============================================================
+    mean_db_per_bin = np.percentile(S_db, 90, axis=1)
+    peak_db = float(np.max(S_db))
+    threshold_db = peak_db - 60
+    non_silent_bins = np.where(mean_db_per_bin > threshold_db)[0]
+    highest_freq = float(freqs[non_silent_bins[-1]]) if non_silent_bins.size else 0.0
+    # ============================================================
+    # Speech-Centric Band Energy Distribution
+    # ============================================================
+    def band_energy(low, high):
+        i1 = np.searchsorted(freqs, low)
+        i2 = np.searchsorted(freqs, high)
+        return float(100 * np.sum(energy[i1:i2]) / total_energy)
+    def band_energy_above(f):
+        idx = np.searchsorted(freqs, f)
+        return float(100 * np.sum(energy[idx:]) / total_energy)
+    energy_stats = {
+        "below_100hz": band_energy(0, 100),
+        "100_500hz": band_energy(100, 500),
+        "500_2khz": band_energy(500, 2000),
+        "2k_8khz": band_energy(2000, 8000),
+        "8k_12khz": band_energy(8000, 12000),
+        "12k_16khz": band_energy(12000, 16000),
+        "above_16khz": band_energy_above(16000)
+    }
+    # ============================================================
+    # Brick-wall Detection
+    # ============================================================
+    diffs = np.diff(mean_db_per_bin)
+    big_drop_idx = np.where(diffs < -20)[0]
+    brick_wall = bool(big_drop_idx.size)
+    brick_freq = float(freqs[big_drop_idx[0]]) if big_drop_idx.size else None
+    # ============================================================
+    # Spectral Notch Detection (Median-filtering)
+    # ============================================================
+    smooth = sps.medfilt(mean_db_per_bin, kernel_size=9)
+    minima = sps.argrelextrema(smooth, np.less)[0]
+    notches = []
+    for m in minima:
+        left = smooth[max(0, m - 6):m]
+        right = smooth[m + 1:min(len(smooth), m + 7)]
+        neighbor_peak = max(
+            left.max() if left.size else -999,
+            right.max() if right.size else -999
+        )
+        depth = neighbor_peak - smooth[m]
+        if depth >= 15 and freqs[m] > 100:
+            notches.append({
+                "freq": float(freqs[m]),
+                "depth_db": float(depth)
+            })
+    # ============================================================
+    # Additional Spectral Descriptors
+    # ============================================================
+    centroid = float(np.mean(librosa.feature.spectral_centroid(S=S, sr=sr)))
+    bandwidth = float(np.mean(librosa.feature.spectral_bandwidth(S=S, sr=sr)))
+    flatness = float(np.mean(librosa.feature.spectral_flatness(S=S)))
+    rolloff = float(np.mean(librosa.feature.spectral_rolloff(S=S, sr=sr)))
+    # ============================================================
+    # FINAL RETURN STRUCTURE
+    # (Matches original format exactly)
+    # ============================================================
+    return {
+        "S_db": S_db,
+        "freqs": freqs,
+        "hop_length": hop_length,
+        "n_fft": n_fft,
+        "rolloff_85pct": freq_at_85,
+        "rolloff_95pct": freq_at_95,
+        "highest_freq_minus60db": highest_freq,
+        "energy_distribution": energy_stats,
+        "brick_wall_detected": brick_wall,
+        "brick_wall_freq": brick_freq,
+        "spectral_notches": notches,
+        "spectral_centroid": centroid,
+        "spectral_bandwidth": bandwidth,
+        "spectral_flatness": flatness,
+        "spectral_rolloff": rolloff,
+        # Added envelopes for downstream detectors (unchanged logic)
+        "hf_env": mean_db_per_bin,
+        "lf_env": mean_db_per_bin[:200] if len(mean_db_per_bin) > 200 else mean_db_per_bin
+    }