Spaces:

Mr7Explorer
/

audio_analyzer

Sleeping

App Files Files Community

Mr7Explorer commited on Dec 9, 2025

Commit

ee0d393

verified ·

1 Parent(s): de309bd

Update app.py

Browse files

Files changed (1) hide show

app.py +29 -25

app.py CHANGED Viewed

@@ -23,7 +23,6 @@ except ImportError:
 # ==================== ANALYSIS FUNCTIONS ====================
 def read_audio_info(path):
-    """Read audio file metadata"""
     info = sf.info(path)
     return {
         "samplerate": int(info.samplerate),
@@ -36,7 +35,6 @@ def read_audio_info(path):
 def compute_time_domain_stats(y):
-    """Calculate time-domain statistics"""
     peak = float(np.max(np.abs(y)))
     rms = float(np.sqrt(np.mean(y**2)))
@@ -62,36 +60,43 @@ def compute_time_domain_stats(y):
 def compute_spectral_analysis(y, sr, n_fft=8192):
-    """Comprehensive spectral analysis"""
     hop_length = n_fft // 4
     S = np.abs(librosa.stft(y, n_fft=n_fft, hop_length=hop_length, window='hann'))
     freqs = np.linspace(0, sr/2, S.shape[0])
     S_db = librosa.amplitude_to_db(S, ref=np.max)
-    energy = np.sum(S**2, axis=1) + 1e-20
     total_energy = float(np.sum(energy))
     cum_energy = np.cumsum(energy)
     roll85_idx = np.searchsorted(cum_energy, 0.85 * total_energy)
     roll95_idx = np.searchsorted(cum_energy, 0.95 * total_energy)
     freq_at_85 = float(freqs[min(roll85_idx, len(freqs)-1)])
     freq_at_95 = float(freqs[min(roll95_idx, len(freqs)-1)])
-    mean_db_per_bin = np.mean(S_db, axis=1)
     peak_db = float(np.max(S_db))
     threshold_db = peak_db - 60.0
     non_silent_bins = np.where(mean_db_per_bin > threshold_db)[0]
     highest_freq = float(freqs[non_silent_bins[-1]]) if non_silent_bins.size else 0.0
     def energy_above(f):
         idx = np.searchsorted(freqs, f)
         return float(100.0 * np.sum(energy[idx:]) / total_energy)
     def energy_below(f):
         idx = np.searchsorted(freqs, f)
         return float(100.0 * np.sum(energy[:idx]) / total_energy)
     energy_stats = {
         "below_100hz": energy_below(100),
         "below_200hz": energy_below(200),
@@ -102,12 +107,14 @@ def compute_spectral_analysis(y, sr, n_fft=8192):
         "above_12khz": energy_above(12000),
         "above_16khz": energy_above(16000),
     }
     diffs = np.diff(mean_db_per_bin)
     big_drop_idx = np.where(diffs < -20.0)[0]
     brick_wall = bool(big_drop_idx.size)
     brick_freq = float(freqs[big_drop_idx[0]]) if big_drop_idx.size else None
     smooth = sps.medfilt(mean_db_per_bin, kernel_size=9)
     minima = sps.argrelextrema(smooth, np.less)[0]
     notches = []
@@ -121,12 +128,12 @@ def compute_spectral_analysis(y, sr, n_fft=8192):
         depth = neighbors_peak - smooth[m]
         if depth >= 15.0 and freqs[m] > 100:
             notches.append({"freq": float(freqs[m]), "depth_db": float(depth)})
     centroid = float(np.mean(librosa.feature.spectral_centroid(S=S, sr=sr)))
     bandwidth = float(np.mean(librosa.feature.spectral_bandwidth(S=S, sr=sr)))
     flatness = float(np.mean(librosa.feature.spectral_flatness(S=S)))
     rolloff = float(np.mean(librosa.feature.spectral_rolloff(S=S, sr=sr)))
     return {
         "S_db": S_db,
         "freqs": freqs,
@@ -147,7 +154,6 @@ def compute_spectral_analysis(y, sr, n_fft=8192):
 def compute_loudness(y, sr):
-    """Compute integrated loudness (LUFS)"""
     if not LOUDNESS_AVAILABLE:
         return None
     try:
@@ -159,7 +165,6 @@ def compute_loudness(y, sr):
 def detect_audio_issues(spectral, time_stats):
-    """Detect common audio processing artifacts"""
     issues = []
     energy = spectral["energy_distribution"]
@@ -169,35 +174,34 @@ def detect_audio_issues(spectral, time_stats):
     elif energy["below_200hz"] < 5.0:
         issues.append(("HIGH_PASS_FILTER", "MEDIUM",
                       f"Low energy below 200Hz ({energy['below_200hz']:.2f}%). Possible mild HPF."))
     if energy["above_12khz"] < 0.2 and spectral["highest_freq_minus60db"] < 12000:
         issues.append(("HF_LOSS", "HIGH",
                       f"Severe HF loss. Only {energy['above_12khz']:.3f}% above 12kHz."))
     elif energy["above_12khz"] < 1.0:
         issues.append(("HF_LOSS", "MEDIUM",
                       f"Reduced HF content ({energy['above_12khz']:.2f}% above 12kHz)."))
     if spectral["brick_wall_detected"]:
         issues.append(("BRICK_WALL", "HIGH",
                       f"Brick-wall filter at {spectral['brick_wall_freq']:.0f}Hz."))
     if len(spectral["spectral_notches"]) > 0:
         issues.append(("SPECTRAL_NOTCHES", "MEDIUM",
                       f"{len(spectral['spectral_notches'])} spectral notches detected."))
     if time_stats["crest_factor_db"] < 3.0:
         issues.append(("OVER_COMPRESSION", "HIGH",
                       f"Very low crest factor ({time_stats['crest_factor_db']:.1f}dB). Heavy compression."))
     elif time_stats["crest_factor_db"] < 6.0:
         issues.append(("COMPRESSION", "MEDIUM",
                       f"Low crest factor ({time_stats['crest_factor_db']:.1f}dB). Moderate compression."))
     if time_stats["peak"] >= 0.999:
         issues.append(("CLIPPING", "CRITICAL",
                       f"Peak at {time_stats['peak']:.6f}. Possible digital clipping!"))
-    return issues
 def create_report(audio_data, output_path):
     """Create comprehensive PNG report"""

 # ==================== ANALYSIS FUNCTIONS ====================
 def read_audio_info(path):
     info = sf.info(path)
     return {
         "samplerate": int(info.samplerate),
 def compute_time_domain_stats(y):
     peak = float(np.max(np.abs(y)))
     rms = float(np.sqrt(np.mean(y**2)))
 def compute_spectral_analysis(y, sr, n_fft=8192):
     hop_length = n_fft // 4
+    # STFT
     S = np.abs(librosa.stft(y, n_fft=n_fft, hop_length=hop_length, window='hann'))
     freqs = np.linspace(0, sr/2, S.shape[0])
+    # dB matrix
     S_db = librosa.amplitude_to_db(S, ref=np.max)
+    # ===== HYBRID FIX: Percentile-Based Energy =====
+    S_power = S**2
+    energy = np.percentile(S_power, 75, axis=1) + 1e-20
     total_energy = float(np.sum(energy))
     cum_energy = np.cumsum(energy)
     roll85_idx = np.searchsorted(cum_energy, 0.85 * total_energy)
     roll95_idx = np.searchsorted(cum_energy, 0.95 * total_energy)
     freq_at_85 = float(freqs[min(roll85_idx, len(freqs)-1)])
     freq_at_95 = float(freqs[min(roll95_idx, len(freqs)-1)])
+    # ===== HYBRID FIX: 90th percentile dB (instead of mean) =====
+    mean_db_per_bin = np.percentile(S_db, 90, axis=1)
     peak_db = float(np.max(S_db))
     threshold_db = peak_db - 60.0
     non_silent_bins = np.where(mean_db_per_bin > threshold_db)[0]
     highest_freq = float(freqs[non_silent_bins[-1]]) if non_silent_bins.size else 0.0
+    # Energy band functions
     def energy_above(f):
         idx = np.searchsorted(freqs, f)
         return float(100.0 * np.sum(energy[idx:]) / total_energy)
     def energy_below(f):
         idx = np.searchsorted(freqs, f)
         return float(100.0 * np.sum(energy[:idx]) / total_energy)
     energy_stats = {
         "below_100hz": energy_below(100),
         "below_200hz": energy_below(200),
         "above_12khz": energy_above(12000),
         "above_16khz": energy_above(16000),
     }
+    # Brick-wall detection using new percentile spectrum
     diffs = np.diff(mean_db_per_bin)
     big_drop_idx = np.where(diffs < -20.0)[0]
     brick_wall = bool(big_drop_idx.size)
     brick_freq = float(freqs[big_drop_idx[0]]) if big_drop_idx.size else None
+    # Spectral notches (unchanged, but uses new mean_db_per_bin)
     smooth = sps.medfilt(mean_db_per_bin, kernel_size=9)
     minima = sps.argrelextrema(smooth, np.less)[0]
     notches = []
         depth = neighbors_peak - smooth[m]
         if depth >= 15.0 and freqs[m] > 100:
             notches.append({"freq": float(freqs[m]), "depth_db": float(depth)})
     centroid = float(np.mean(librosa.feature.spectral_centroid(S=S, sr=sr)))
     bandwidth = float(np.mean(librosa.feature.spectral_bandwidth(S=S, sr=sr)))
     flatness = float(np.mean(librosa.feature.spectral_flatness(S=S)))
     rolloff = float(np.mean(librosa.feature.spectral_rolloff(S=S, sr=sr)))
     return {
         "S_db": S_db,
         "freqs": freqs,
 def compute_loudness(y, sr):
     if not LOUDNESS_AVAILABLE:
         return None
     try:
 def detect_audio_issues(spectral, time_stats):
     issues = []
     energy = spectral["energy_distribution"]
     elif energy["below_200hz"] < 5.0:
         issues.append(("HIGH_PASS_FILTER", "MEDIUM",
                       f"Low energy below 200Hz ({energy['below_200hz']:.2f}%). Possible mild HPF."))
     if energy["above_12khz"] < 0.2 and spectral["highest_freq_minus60db"] < 12000:
         issues.append(("HF_LOSS", "HIGH",
                       f"Severe HF loss. Only {energy['above_12khz']:.3f}% above 12kHz."))
     elif energy["above_12khz"] < 1.0:
         issues.append(("HF_LOSS", "MEDIUM",
                       f"Reduced HF content ({energy['above_12khz']:.2f}% above 12kHz)."))
     if spectral["brick_wall_detected"]:
         issues.append(("BRICK_WALL", "HIGH",
                       f"Brick-wall filter at {spectral['brick_wall_freq']:.0f}Hz."))
     if len(spectral["spectral_notches"]) > 0:
         issues.append(("SPECTRAL_NOTCHES", "MEDIUM",
                       f"{len(spectral['spectral_notches'])} spectral notches detected."))
     if time_stats["crest_factor_db"] < 3.0:
         issues.append(("OVER_COMPRESSION", "HIGH",
                       f"Very low crest factor ({time_stats['crest_factor_db']:.1f}dB). Heavy compression."))
     elif time_stats["crest_factor_db"] < 6.0:
         issues.append(("COMPRESSION", "MEDIUM",
                       f"Low crest factor ({time_stats['crest_factor_db']:.1f}dB). Moderate compression."))
     if time_stats["peak"] >= 0.999:
         issues.append(("CLIPPING", "CRITICAL",
                       f"Peak at {time_stats['peak']:.6f}. Possible digital clipping!"))
+    return issues
 def create_report(audio_data, output_path):
     """Create comprehensive PNG report"""