Spaces:

Mr7Explorer
/

audio_analyzer

Sleeping

App Files Files Community

Mr7Explorer commited on Dec 10, 2025

Commit

d2789be

verified ·

1 Parent(s): 3d56a8d

Update app.py

Browse files

Files changed (1) hide show

app.py +299 -67

app.py CHANGED Viewed

@@ -183,92 +183,262 @@ def compute_loudness(y, sr):
         return None
 # ============================================================
-# UPDATED ISSUE DETECTION (HF thresholds corrected)
 # ============================================================
 def detect_audio_issues(spectral, time_stats):
-    """Detect common audio processing artifacts"""
     issues = []
     energy = spectral["energy_distribution"]
-    # ============================
-    # UPDATED HF LOSS LOGIC
-    # ============================
     hf_8_12 = energy["8k_12khz"]
     highest_freq = spectral["highest_freq_minus60db"]
-    # 1️⃣ Severe HF cutoff (actual filtering / NR damage)
     if hf_8_12 < 0.01 and highest_freq < 9000:
         issues.append((
             "HF_LOSS", "HIGH",
-            f"Possible HF cutoff: only {hf_8_12:.3f}% in 8–12kHz and rolloff at {highest_freq:.1f} Hz."
         ))
-    # 2️⃣ Low HF energy — common in normal speech
     elif hf_8_12 < 0.02:
         issues.append((
             "HF_LOSS", "LOW",
-            f"Low HF energy ({hf_8_12:.3f}% in 8–12kHz). Normal for speech."
         ))
-    # ============================
-    # High-pass filter check
-    # ============================
-    if energy["below_100hz"] < 0.5:
         issues.append((
-            "HIGH_PASS_FILTER",
-            "HIGH",
-            f"Very low energy <100Hz ({energy['below_100hz']:.2f}%). Possible HPF."
         ))
-    # ============================
-    # Brick-wall filter detection
-    # ============================
     if spectral["brick_wall_detected"]:
         issues.append((
-            "BRICK_WALL",
-            "HIGH",
-            f"Brick-wall behavior detected at {spectral['brick_wall_freq']:.0f} Hz."
         ))
-    # ============================
-    # Spectral notches
-    # ============================
-    if len(spectral["spectral_notches"]) > 0:
-        issues.append((
-            "SPECTRAL_NOTCHES",
-            "MEDIUM",
-            f"{len(spectral['spectral_notches'])} spectral notches found."
-        ))
-    # ============================
-    # Compression / dynamics
-    # ============================
-    if time_stats["crest_factor_db"] < 3:
         issues.append((
-            "OVER_COMPRESSION",
-            "HIGH",
-            f"Very low crest factor ({time_stats['crest_factor_db']:.1f} dB)."
         ))
-    elif time_stats["crest_factor_db"] < 6:
         issues.append((
-            "COMPRESSION",
-            "MEDIUM",
-            f"Low crest factor ({time_stats['crest_factor_db']:.1f} dB)."
         ))
-    # ============================
-    # Clipping
-    # ============================
     if time_stats["peak"] >= 0.999:
         issues.append((
-            "CLIPPING",
-            "CRITICAL",
             f"Peak amplitude {time_stats['peak']:.6f}. Possible clipping."
         ))
-    # FINAL RETURN — MUST BE INDENTED INSIDE FUNCTION
     return issues
 # ============================================================
@@ -501,7 +671,7 @@ def create_report(audio_data, output_path):
     # ============================
-    # ISSUES PANEL
     # ============================
     ax_issues = fig.add_subplot(gs[3, 0:3])
@@ -514,9 +684,12 @@ def create_report(audio_data, output_path):
         "═" * 80
     ]
     if not issues:
         issue_lines.append("✅ No significant issues detected.")
     else:
         severity_icons = {
             "CRITICAL": "🔴 CRITICAL",
             "HIGH": "🟠 HIGH",
@@ -524,29 +697,48 @@ def create_report(audio_data, output_path):
             "LOW": "🟢 LOW"
         }
         for issue_type, severity, description in issues:
             icon = severity_icons.get(severity, "⚪ INFO")
             issue_lines.append(f"\n{icon} — {issue_type}")
             issue_lines.append(f"  → {description}")
-    # If spectral notches exist, list them
     if spec["spectral_notches"]:
-        issue_lines.append(f"\n🎵 SPECTRAL NOTCHES DETECTED: {len(spec['spectral_notches'])}")
         for i, notch in enumerate(spec["spectral_notches"][:5], start=1):
             issue_lines.append(
-                f"  {i}. Frequency: {notch['freq']:.1f} Hz, Depth: {notch['depth_db']:.1f} dB"
             )
         if len(spec["spectral_notches"]) > 5:
-            issue_lines.append(f"  ... and {len(spec['spectral_notches']) - 5} more")
-    # Brickwall detection notice
     if spec["brick_wall_detected"]:
-        issue_lines.append(f"\n⚠️ BRICK-WALL FILTER: Detected at {spec['brick_wall_freq']:.0f} Hz")
     issues_text = "\n".join(issue_lines)
     ax_issues.text(
-        0.05, 0.95, issues_text,
         transform=ax_issues.transAxes,
         fontsize=11,
         verticalalignment="top",
@@ -558,8 +750,9 @@ def create_report(audio_data, output_path):
             linewidth=2
         )
     )
     # ============================
-    # QUALITY SCORE PANEL
     # ============================
     ax_score = fig.add_subplot(gs[3, 3])
@@ -567,28 +760,59 @@ def create_report(audio_data, output_path):
     issues = audio_data["issues"]
-    # Score penalties
     critical = sum(1 for _, sev, _ in issues if sev == "CRITICAL")
     high = sum(1 for _, sev, _ in issues if sev == "HIGH")
     medium = sum(1 for _, sev, _ in issues if sev == "MEDIUM")
     score = 100
-    score -= critical * 30
-    score -= high * 15
-    score -= medium * 5
-    score = max(0, score)
-    # Grade + Color
     if score >= 90:
         grade, quality, color = "A", "EXCELLENT", "#00C853"
     elif score >= 75:
         grade, quality, color = "B", "GOOD", "#64DD17"
     elif score >= 60:
         grade, quality, color = "C", "FAIR", "#FFD600"
     elif score >= 40:
         grade, quality, color = "D", "POOR", "#FF6D00"
     else:
         grade, quality, color = "F", "CRITICAL", "#D50000"
     score_lines = [
         "QUALITY ASSESSMENT",
@@ -598,11 +822,21 @@ def create_report(audio_data, output_path):
         f"GRADE: {grade}",
         f"QUALITY: {quality}",
         "",
         "ISSUES SUMMARY",
         "─" * 28,
         f"🔴 Critical: {critical}",
         f"🟠 High:     {high}",
         f"🟡 Medium:   {medium}",
         "",
         "─" * 28,
         "Generated:",
@@ -628,10 +862,7 @@ def create_report(audio_data, output_path):
         fontweight="bold"
     )
-    # ============================
     # SAVE REPORT
-    # ============================
     plt.savefig(
         output_path,
         dpi=300,
@@ -642,6 +873,7 @@ def create_report(audio_data, output_path):
     plt.close()
     return output_path
 # ============================================================
 # MAIN ANALYSIS FUNCTION (GRADIO CALLBACK)
 # ============================================================

         return None
 # ============================================================
+# ADVANCED ISSUE DETECTION ENGINE
+# Includes: HF-loss logic, LPF detector, HPF detector,
+# NR artifacts, spectral anomalies, compression, clipping
 # ============================================================
 def detect_audio_issues(spectral, time_stats):
+    """Detect audio processing artifacts with advanced forensic analysis."""
     issues = []
     energy = spectral["energy_distribution"]
+    freqs = spectral["freqs"]
+    hf_env = spectral.get("hf_env", None)
+    lf_env = spectral.get("lf_env", None)
+    flatness = spectral.get("spectral_flatness", None)
+    notches = spectral.get("spectral_notches", [])
+    # ============================================================
+    # 1️⃣ HF LOSS LOGIC (Speech-safe Thresholds)
+    # ============================================================
     hf_8_12 = energy["8k_12khz"]
     highest_freq = spectral["highest_freq_minus60db"]
+    # Severe HF cutoff → Real LPF or aggressive NR
     if hf_8_12 < 0.01 and highest_freq < 9000:
         issues.append((
             "HF_LOSS", "HIGH",
+            f"Severe HF cutoff: {hf_8_12:.3f}% in 8–12k and rolloff at {highest_freq:.1f} Hz."
         ))
+    # Mild HF weakness → Normal for speech
     elif hf_8_12 < 0.02:
         issues.append((
             "HF_LOSS", "LOW",
+            f"Low HF energy ({hf_8_12:.3f}%). Normal for speech."
         ))
+    # ============================================================
+    # 2️⃣ LPF DETECTOR (Low-pass filter)
+    # ============================================================
+    if hf_env is not None:
+        hf_region = (freqs >= 5000) & (freqs <= 12000)
+        hf_vals = hf_env[hf_region]
+        hf_freq = freqs[hf_region]
+        if len(hf_vals) > 10:
+            coef = np.polyfit(hf_freq, hf_vals, 1)
+            slope_per_hz = coef[0]
+            slope_db_oct = slope_per_hz * np.log2(2) * 12000
+            # Hard LPF cutoff
+            if highest_freq < 10000:
+                issues.append((
+                    "LPF_DETECTED", "HIGH",
+                    f"Low-pass filter near {highest_freq:.0f} Hz."
+                ))
+            # Soft HF tilt (EQ shelf)
+            elif slope_db_oct < -6:
+                issues.append((
+                    "HF_EQ_SHELF", "LOW",
+                    f"HF rolloff detected (~{slope_db_oct:.1f} dB/oct)."
+                ))
+    # ============================================================
+    # 3️⃣ HPF DETECTOR (High-pass filter)
+    # ============================================================
+    if lf_env is not None:
+        low_region = (freqs >= 20) & (freqs <= 300)
+        lf_vals = lf_env[low_region]
+        lf_freq = freqs[low_region]
+        if len(lf_vals) > 10:
+            coef_l = np.polyfit(lf_freq, lf_vals, 1)
+            slope_l = coef_l[0]
+            slope_db_oct_l = slope_l * np.log2(2) * 300
+            if energy["below_100hz"] < 0.5:
+                if slope_db_oct_l > 6:
+                    issues.append((
+                        "HPF_DETECTED", "HIGH",
+                        f"High-pass filter detected (~{slope_db_oct_l:.1f} dB/oct)."
+                    ))
+                else:
+                    issues.append((
+                        "HPF_SUSPECTED", "LOW",
+                        f"Possible mild HPF (LF rolloff)."
+                    ))
+    # ============================================================
+    # 4️⃣ Noise Reduction Artifact Detector
+    # ============================================================
+    if flatness is not None:
+        hf_flat = np.mean(flatness[-20:])  # Flattening in top HF region
+        # Strong NR → metallic artifacts, HF flattening + notches
+        if hf_flat > 0.40 and len(notches) >= 3:
+            issues.append((
+                "NOISE_REDUCTION_ARTIFACTS", "HIGH",
+                f"NR artifacts: HF flattening ({hf_flat:.2f}) + {len(notches)} notches."
+            ))
+        # Mild NR
+        elif hf_flat > 0.35:
+            issues.append((
+                "NR_SOFT", "LOW",
+                f"Mild noise reduction detected (HF flattening={hf_flat:.2f})."
+            ))
+    # ============================================================
+    # 5️⃣ Spectral Notches (Resonance Removal / NR)
+    # ============================================================
+    if len(notches) > 0:
         issues.append((
+            "SPECTRAL_NOTCHES", "MEDIUM",
+            f"{len(notches)} spectral notches detected."
         ))
+    # ============================================================
+    # 6️⃣ Brick-wall LPF (from original code)
+    # ============================================================
     if spectral["brick_wall_detected"]:
         issues.append((
+            "BRICK_WALL", "HIGH",
+            f"Brick-wall behavior at {spectral['brick_wall_freq']:.0f} Hz."
         ))
+    # ============================================================
+    # 7️⃣ Compression / Dynamics
+    # ============================================================
+    crest = time_stats["crest_factor_db"]
+    if crest < 3:
         issues.append((
+            "OVER_COMPRESSION", "HIGH",
+            f"Very low crest factor ({crest:.1f} dB)."
         ))
+    elif crest < 6:
         issues.append((
+            "COMPRESSION", "MEDIUM",
+            f"Moderate compression ({crest:.1f} dB)."
         ))
+    # ============================================================
+    # 8️⃣ Clipping
+    # ============================================================
     if time_stats["peak"] >= 0.999:
         issues.append((
+            "CLIPPING", "CRITICAL",
             f"Peak amplitude {time_stats['peak']:.6f}. Possible clipping."
         ))
+    # ============================================================
+    # 9️⃣ DE-ESSER DETECTOR (HF transient suppression)
+    # ============================================================
+    # Presence & sibilance bands
+    band_3_6k = (freqs >= 3000) & (freqs <= 6000)
+    band_6_10k = (freqs >= 6000) & (freqs <= 10000)
+    if hf_env is not None:
+        presence_energy = np.mean(hf_env[band_3_6k])
+        sibilance_energy = np.mean(hf_env[band_6_10k])
+    # Ratio of presence energy to sibilance energy
+    if sibilance_energy < (presence_energy * 0.20):
+        issues.append((
+            "DE_ESSER_DETECTED", "MEDIUM",
+            "Sibilance band (6–10 kHz) strongly reduced relative to presence band (3–6 kHz). Possible de-essing."
+        ))
+    # ============================================================
+    # 🔟 MULTIBAND COMPRESSION DETECTOR
+    # ============================================================
+lf_band = (freqs >= 80) & (freqs <= 300)
+mf_band = (freqs >= 300) & (freqs <= 3000)
+hf_band = (freqs >= 3000) & (freqs <= 8000)
+def band_crest(env, band):
+    vals = env[band]
+    if len(vals) == 0:
+        return None
+    return np.max(vals) - np.mean(vals)
+if hf_env is not None:
+    cf_lf = band_crest(hf_env, lf_band)
+    cf_mf = band_crest(hf_env, mf_band)
+    cf_hf = band_crest(hf_env, hf_band)
+    # Compression fingerprint: MF and HF crest factor collapse
+    if cf_mf is not None and cf_hf is not None and cf_lf is not None:
+        # Heavy multiband compression signature
+        if cf_hf < (cf_lf * 0.4):
+            issues.append((
+                "MULTIBAND_COMPRESSION", "MEDIUM",
+                "HF crest factor significantly lower than LF. Possible multiband compression."
+            ))
+        if cf_mf < (cf_lf * 0.5):
+            issues.append((
+                "MULTIBAND_COMPRESSION", "LOW",
+                "Mid-band crest factor unusually compressed vs LF."
+            ))
+# ============================================================
+# 1️⃣1️⃣ EQ CURVE CLASSIFIER
+# ============================================================
+if hf_env is not None:
+    # Smooth envelope for stability
+    smooth = sps.medfilt(hf_env, kernel_size=9)
+    # Evaluate global tilt (HF slope)
+    coef_eq = np.polyfit(freqs, smooth, 1)
+    tilt = coef_eq[0]
+    # Check curvature — identifies shelves and peaking EQ
+    curvature = np.polyfit(freqs, smooth, 2)[0]
+    # Detect HF shelf boost
+    if tilt > 0.00002:
+        issues.append((
+            "EQ_HF_BOOST", "LOW",
+            "HF shelf boost detected (positive spectral tilt)."
+        ))
+    # Detect HF shelf cut
+    elif tilt < -0.00002:
+        issues.append((
+            "EQ_HF_CUT", "LOW",
+            "HF shelf cut detected (negative spectral tilt)."
+        ))
+    # Detect midrange peaking EQ
+    if curvature > 1e-12:
+        issues.append((
+            "EQ_PEAKING", "LOW",
+            "Spectral curvature indicates possible midrange peaking EQ."
+        ))
+    # Detect tilt EQ
+    if abs(tilt) > 0.00001 and abs(curvature) < 1e-12:
+        issues.append((
+            "EQ_TILT", "LOW",
+            "Tilt EQ detected (linear upward/downward spectral tilt)."
+        ))
+    # ============================================================
+    # Final return
+    # ============================================================
     return issues
 # ============================================================
     # ============================
+    # ISSUES PANEL (UPDATED)
     # ============================
     ax_issues = fig.add_subplot(gs[3, 0:3])
         "═" * 80
     ]
+    # No issues
     if not issues:
         issue_lines.append("✅ No significant issues detected.")
     else:
+        # Updated severity mapping
         severity_icons = {
             "CRITICAL": "🔴 CRITICAL",
             "HIGH": "🟠 HIGH",
             "LOW": "🟢 LOW"
         }
+        # Dynamic issue listing (supports all new detectors)
         for issue_type, severity, description in issues:
             icon = severity_icons.get(severity, "⚪ INFO")
             issue_lines.append(f"\n{icon} — {issue_type}")
             issue_lines.append(f"  → {description}")
+    # ============================
+    # SPECTRAL NOTCH DETAILS
+    # ============================
     if spec["spectral_notches"]:
+        issue_lines.append("\n🎵 SPECTRAL NOTCHES DETECTED:")
+        issue_lines.append(f"  Total: {len(spec['spectral_notches'])}")
         for i, notch in enumerate(spec["spectral_notches"][:5], start=1):
             issue_lines.append(
+                f"    {i}. {notch['freq']:.1f} Hz  (Depth: {notch['depth_db']:.1f} dB)"
             )
         if len(spec["spectral_notches"]) > 5:
+            issue_lines.append(
+                f"    ... and {len(spec['spectral_notches']) - 5} more notches"
+            )
+    # ============================
+    # BRICK-WALL FILTER NOTICE
+    # ============================
     if spec["brick_wall_detected"]:
+        issue_lines.append(
+            f"\n⚠️ BRICK-WALL FILTER DETECTED at {spec['brick_wall_freq']:.0f} Hz"
+        )
+    # ==================================================================
+    # FINAL OUTPUT
+    # ==================================================================
     issues_text = "\n".join(issue_lines)
     ax_issues.text(
+        0.05, 0.95,
+        issues_text,
         transform=ax_issues.transAxes,
         fontsize=11,
         verticalalignment="top",
             linewidth=2
         )
     )
     # ============================
+    # QUALITY SCORE PANEL (UPDATED)
     # ============================
     ax_score = fig.add_subplot(gs[3, 3])
     issues = audio_data["issues"]
+    # Separate counts by severity
     critical = sum(1 for _, sev, _ in issues if sev == "CRITICAL")
     high = sum(1 for _, sev, _ in issues if sev == "HIGH")
     medium = sum(1 for _, sev, _ in issues if sev == "MEDIUM")
+    low = sum(1 for _, sev, _ in issues if sev == "LOW")
+    # --------------------------------------------
+    # NEW: Weighted scoring model
+    # --------------------------------------------
     score = 100
+    score -= critical * 35        # Hard-damage issues
+    score -= high * 20            # Major processing
+    score -= medium * 8           # Subtle but relevant
+    score -= low * 3              # Minor processing
+    # Additional penalties for heavy processing
+    if len(issues) >= 6:
+        score -= 10
+    if (critical + high) >= 3:
+        score -= 10
+    # Bonus for clean files
+    if len(issues) == 0:
+        score += 5
+    score = max(0, min(score, 100))
+    # --------------------------------------------
+    # GRADE + COLOR MAPPING
+    # --------------------------------------------
     if score >= 90:
         grade, quality, color = "A", "EXCELLENT", "#00C853"
+        recommendation = "Excellent for TTS dataset"
     elif score >= 75:
         grade, quality, color = "B", "GOOD", "#64DD17"
+        recommendation = "Very good quality; suitable for TTS"
     elif score >= 60:
         grade, quality, color = "C", "FAIR", "#FFD600"
+        recommendation = "Usable but may contain processing artifacts"
     elif score >= 40:
         grade, quality, color = "D", "POOR", "#FF6D00"
+        recommendation = "Not recommended for TTS (heavy processing)"
     else:
         grade, quality, color = "F", "CRITICAL", "#D50000"
+        recommendation = "Severely degraded or processed; avoid for TTS"
+    # --------------------------------------------
+    # NEW: CLEANLINESS & PROCESSING INDEX
+    # --------------------------------------------
+    cleanliness_score = max(0, 100 - (medium * 5 + low * 3))
+    processing_severity = (critical * 3) + (high * 2) + medium
     score_lines = [
         "QUALITY ASSESSMENT",
         f"GRADE: {grade}",
         f"QUALITY: {quality}",
         "",
+        "RECOMMENDATION:",
+        f"{recommendation}",
+        "",
+        "CLEANLINESS SCORE:",
+        f"{cleanliness_score}/100",
+        "",
+        "PROCESSING SEVERITY INDEX:",
+        f"{processing_severity}",
+        "",
         "ISSUES SUMMARY",
         "─" * 28,
         f"🔴 Critical: {critical}",
         f"🟠 High:     {high}",
         f"🟡 Medium:   {medium}",
+        f"🟢 Low:      {low}",
         "",
         "─" * 28,
         "Generated:",
         fontweight="bold"
     )
     # SAVE REPORT
     plt.savefig(
         output_path,
         dpi=300,
     plt.close()
     return output_path
 # ============================================================
 # MAIN ANALYSIS FUNCTION (GRADIO CALLBACK)
 # ============================================================