Spaces:

Mr7Explorer
/

audio_analyzer

Sleeping

App Files Files Community

Mr7Explorer commited on Dec 10, 2025

Commit

ea9bfe4

verified ·

1 Parent(s): 7d4358f

Update app.py

Browse files

Files changed (1) hide show

app.py +165 -441

app.py CHANGED Viewed

@@ -1,497 +1,221 @@
-# ============================================================
-# AUDIO FORENSIC ANALYZER — FINAL VERSION WITH SYNTHETIC DETECTOR
-# ============================================================
 import gradio as gr
-import sys
 from pathlib import Path
 from datetime import datetime
-import warnings
-warnings.filterwarnings('ignore')
-import numpy as np
-import soundfile as sf
-import librosa
-import librosa.display
-import matplotlib.pyplot as plt
-import matplotlib.gridspec as gridspec
-import scipy.signal as sps
-try:
-    import pyloudnorm as pyln
-    LOUDNESS_AVAILABLE = True
-except ImportError:
-    LOUDNESS_AVAILABLE = False
-# ============================================================
-# READ AUDIO INFO
-# ============================================================
-def read_audio_info(path):
-    info = sf.info(path)
-    return {
-        "samplerate": int(info.samplerate),
-        "channels": int(info.channels),
-        "frames": int(info.frames),
-        "subtype": info.subtype,
-        "format": info.format,
-        "duration": float(info.frames) / info.samplerate if info.frames else 0.0
-    }
-# ============================================================
-# TIME-DOMAIN STATS
-# ============================================================
-def compute_time_domain_stats(y):
-    peak = float(np.max(np.abs(y)))
-    rms = float(np.sqrt(np.mean(y ** 2)))
-    peak_db = 20 * np.log10(max(peak, 1e-12))
-    rms_db = 20 * np.log10(max(rms, 1e-12))
-    crest_factor = peak_db - rms_db
-    abs_y = np.abs(y)
-    noise_floor = float(np.percentile(abs_y, 10))
-    snr_est = 20 * np.log10(max(rms, 1e-12) / max(noise_floor, 1e-12))
-    zcr = float(np.mean(librosa.feature.zero_crossing_rate(y)))
-    return {
-        "peak": peak,
-        "rms": rms,
-        "peak_db": peak_db,
-        "rms_db": rms_db,
-        "crest_factor_db": crest_factor,
-        "noise_floor": noise_floor,
-        "snr_db": snr_est,
-        "zero_crossing_rate": zcr
-    }
-# ============================================================
-# SPECTRAL ANALYSIS
-# ============================================================
-def compute_spectral_analysis(y, sr, n_fft=4096):
-    hop = n_fft // 4
-    S = np.abs(librosa.stft(y, n_fft=n_fft, hop_length=hop, window="hann"))
-    freqs = np.linspace(0, sr / 2, S.shape[0])
-    S_db = librosa.amplitude_to_db(S, ref=np.max)
-    S_power = S ** 2
-    energy = np.percentile(S_power, 90, axis=1) + 1e-20
-    total_energy = float(np.sum(energy))
-    cum_energy = np.cumsum(energy)
-    idx85 = np.searchsorted(cum_energy, 0.85 * total_energy)
-    idx95 = np.searchsorted(cum_energy, 0.95 * total_energy)
-    freq85 = float(freqs[min(idx85, len(freqs)-1)])
-    freq95 = float(freqs[min(idx95, len(freqs)-1)])
-    mean_db = np.percentile(S_db, 90, axis=1)
-    pk = float(np.max(S_db))
-    thr = pk - 60
-    bins = np.where(mean_db > thr)[0]
-    highest_freq = float(freqs[bins[-1]]) if len(bins) else 0.0
-    def band(low, high):
-        i1 = np.searchsorted(freqs, low)
-        i2 = np.searchsorted(freqs, high)
-        return float(100 * np.sum(energy[i1:i2]) / total_energy)
-    def band_above(f):
-        idx = np.searchsorted(freqs, f)
-        return float(100 * np.sum(energy[idx:]) / total_energy)
-    energy_stats = {
-        "below_100hz": band(0, 100),
-        "100_500hz": band(100, 500),
-        "500_2khz": band(500, 2000),
-        "2k_8khz": band(2000, 8000),
-        "8k_12khz": band(8000, 12000),
-        "12k_16khz": band(12000, 16000),
-        "above_16khz": band_above(16000)
-    }
-    diffs = np.diff(mean_db)
-    bw_idx = np.where(diffs < -20)[0]
-    brick = bool(len(bw_idx))
-    brick_freq = float(freqs[bw_idx[0]]) if len(bw_idx) else None
-    smooth = sps.medfilt(mean_db, kernel_size=9)
-    minima = sps.argrelextrema(smooth, np.less)[0]
-    notches = []
-    for m in minima:
-        left = smooth[max(0, m - 6):m]
-        right = smooth[m+1:min(len(smooth), m+7)]
-        neigh = max(left.max() if len(left) else -999,
-                    right.max() if len(right) else -999)
-        depth = neigh - smooth[m]
-        if depth >= 15 and freqs[m] > 100:
-            notches.append({"freq": float(freqs[m]), "depth_db": float(depth)})
-    centroid = float(np.mean(librosa.feature.spectral_centroid(S=S, sr=sr)))
-    bandwidth = float(np.mean(librosa.feature.spectral_bandwidth(S=S, sr=sr)))
-    flatness = float(np.mean(librosa.feature.spectral_flatness(S=S)))
-    rolloff = float(np.mean(librosa.feature.spectral_rolloff(S=S, sr=sr)))
-    return {
-        "S_db": S_db,
-        "freqs": freqs,
-        "hop_length": hop,
-        "rolloff_85pct": freq85,
-        "rolloff_95pct": freq95,
-        "highest_freq_minus60db": highest_freq,
-        "energy_distribution": energy_stats,
-        "brick_wall_detected": brick,
-        "brick_wall_freq": brick_freq,
-        "spectral_notches": notches,
-        "spectral_centroid": centroid,
-        "spectral_bandwidth": bandwidth,
-        "spectral_flatness": flatness,
-        "spectral_rolloff": rolloff
-    }
-# ============================================================
-# SYNTHETIC VOICE DETECTOR (LIGHTWEIGHT)
-# ============================================================
-def detect_synthetic_voice(y, sr, spectral):
-    try:
-        mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=40)
-        mfcc_std = np.mean(np.std(mfcc, axis=1))
-        f0 = librosa.yin(y, 50, 400, sr=sr)
-        jitter = np.std(np.diff(f0) / (np.mean(f0) + 1e-6))
-        energy = spectral["energy_distribution"]
-        sym = abs(energy["8k_12khz"] - energy["12k_16khz"])
-        cs = []
-        for i in range(mfcc.shape[1] - 1):
-            v1 = mfcc[:, i]
-            v2 = mfcc[:, i+1]
-            cs.append(np.dot(v1, v2) /
-                      (np.linalg.norm(v1) * np.linalg.norm(v2) + 1e-8))
-        cos_sim = float(np.mean(cs))
-        score = (
-            1.2 * (cos_sim - 0.85) +
-            0.8 * (0.15 - mfcc_std) +
-            1.0 * (0.02 - jitter) +
-            0.5 * (0.10 - sym)
-        )
-        prob = 1 / (1 + np.exp(-5 * score))
-        prob = float(np.clip(prob, 0, 1))
-        label = "AI" if prob > 0.5 else "Human"
-        return prob, label
-    except:
-        return 0.0, "Human"
-# ============================================================
-# ISSUE DETECTION (Your original logic preserved)
-# ============================================================
-def detect_audio_issues(spectral, time_stats):
-    issues = []
-    energy = spectral["energy_distribution"]
-    freqs = spectral["freqs"]
-    flatness = spectral["spectral_flatness"]
-    notches = spectral["spectral_notches"]
-    hf_8_12 = energy["8k_12khz"]
-    highf = spectral["highest_freq_minus60db"]
-    if hf_8_12 < 0.01 and highf < 9000:
-        issues.append(("HF_LOSS", "HIGH", f"Severe HF cutoff"))
-    elif hf_8_12 < 0.02:
-        issues.append(("HF_LOSS", "LOW", "Low HF energy"))
-    if spectral["brick_wall_detected"]:
-        issues.append(("BRICK_WALL", "HIGH",
-                       f"Brick-wall at {spectral['brick_wall_freq']:.0f} Hz"))
-    if flatness > 0.40 and len(notches) >= 3:
-        issues.append(("NOISE_REDUCTION_ARTIFACTS", "HIGH", "NR artifacts"))
-    elif flatness > 0.35:
-        issues.append(("NR_SOFT", "LOW", "Mild noise reduction"))
-    if len(notches):
-        issues.append(("SPECTRAL_NOTCHES", "MEDIUM",
-                       f"{len(notches)} notches detected"))
-    crest = time_stats["crest_factor_db"]
-    if crest < 3:
-        issues.append(("OVER_COMPRESSION", "HIGH",
-                       f"Crest {crest:.1f} dB"))
-    elif crest < 6:
-        issues.append(("COMPRESSION", "MEDIUM",
-                       f"Crest {crest:.1f} dB"))
-    if time_stats["peak"] >= 0.999:
-        issues.append(("CLIPPING", "CRITICAL",
-                       "Probable clipping"))
-    return issues
-# ============================================================
-# REPORT GENERATION (PNG)
-# ============================================================
-def create_report(data, outpath):
-    plt.style.use("default")
-    fig = plt.figure(figsize=(22, 16))
-    fig.patch.set_facecolor("white")
-    fig.suptitle(
-        f"AUDIO FORENSIC ANALYSIS REPORT\n{data['filename']}",
-        fontsize=20, fontweight="bold", y=0.97
-    )
-    gs = gridspec.GridSpec(
-        4, 4, figure=fig,
-        hspace=0.5, wspace=0.4,
-        height_ratios=[1.6, 1, 1, 1]
-    )
-    # Spectrogram
-    ax = fig.add_subplot(gs[0, :])
-    S_db = data["spectral"]["S_db"]
-    sr = data["info"]["samplerate"]
-    hop = data["spectral"]["hop_length"]
-    img = librosa.display.specshow(
-        S_db, sr=sr, hop_length=hop,
-        x_axis="time", y_axis="hz",
-        cmap="viridis", ax=ax, vmin=-80, vmax=0
-    )
-    ax.set_title("Spectrogram", fontsize=14)
-    plt.colorbar(img, ax=ax)
-    # File info block
-    ax2 = fig.add_subplot(gs[1, 0:2])
-    ax2.axis("off")
-    info = data["info"]
-    t = data["time_stats"]
-    block = [
-        "FILE INFORMATION",
-        f"Sample Rate: {info['samplerate']}",
-        f"Channels: {info['channels']}",
-        f"Duration: {info['duration']:.2f} sec",
-        "",
-        "TIME-DOMAIN",
-        f"Peak: {t['peak_db']:.2f} dBFS",
-        f"RMS: {t['rms_db']:.2f} dBFS",
-        f"Crest: {t['crest_factor_db']:.2f} dB",
-        f"SNR: {t['snr_db']:.1f} dB",
-        f"Zero-Cross: {t['zero_crossing_rate']:.4f}",
-    ]
-    if data["lufs"] is not None:
-        block.append(f"Integrated LUFS: {data['lufs']:.2f}")
-    ax2.text(0.02, 0.98, "\n".join(block), va="top",
-             fontsize=11, family="monospace",
-             bbox=dict(boxstyle="round", fc="#E8F4F8", ec="#0077BE"))
-    # Spectral stats
-    ax3 = fig.add_subplot(gs[1, 2:4])
-    ax3.axis("off")
-    sp = data["spectral"]
-    ed = sp["energy_distribution"]
-    block2 = [
-        "SPECTRAL ANALYSIS",
-        f"Centroid: {sp['spectral_centroid']:.1f}",
-        f"Bandwidth: {sp['spectral_bandwidth']:.1f}",
-        f"Flatness: {sp['spectral_flatness']:.4f}",
-        f"Rolloff 85%: {sp['rolloff_85pct']:.1f}",
-        f"Rolloff 95%: {sp['rolloff_95pct']:.1f}",
-        f"Highest -60dB: {sp['highest_freq_minus60db']:.1f}",
-        "",
-        "ENERGY DISTRIBUTION",
-        *(f"{k}: {v:.2f}%" for k, v in ed.items())
-    ]
-    ax3.text(0.02, 0.98, "\n".join(block2), va="top",
-             fontsize=11, family="monospace",
-             bbox=dict(boxstyle="round", fc="#FFF4E6", ec="#FF8C00"))
-    # Issues
-    ax4 = fig.add_subplot(gs[2, :])
-    ax4.axis("off")
-    issues = data["issues"]
-    lines = ["DETECTED ISSUES", ""]
-    if not issues:
-        lines.append("No major issues detected.")
-    else:
-        for typ, sev, desc in issues:
-            lines.append(f"[{sev}] {typ} → {desc}")
-    if sp["spectral_notches"]:
-        lines.append("")
-        lines.append(f"Spectral Notches: {len(sp['spectral_notches'])}")
-    ax4.text(0.02, 0.98, "\n".join(lines), fontsize=11,
-             va="top", family="monospace",
-             bbox=dict(boxstyle="round", fc="#FFE6E6", ec="#DC143C"))
-    # Quality score + synthetic
-    ax5 = fig.add_subplot(gs[3, :])
-    ax5.axis("off")
-    crit = sum(1 for _, s, _ in issues if s == "CRITICAL")
-    hi = sum(1 for _, s, _ in issues if s == "HIGH")
-    med = sum(1 for _, s, _ in issues if s == "MEDIUM")
-    low = sum(1 for _, s, _ in issues if s == "LOW")
-    score = 100 - (crit * 35 + hi * 20 + med * 8 + low * 3)
-    score = np.clip(score, 0, 100)
-    prob = data["synthetic_prob"]
-    label = data["synthetic_label"]
-    block3 = [
-        "QUALITY & SYNTHETIC ANALYSIS",
-        f"Score: {score:.1f}/100",
-        f"Issues → C:{crit}, H:{hi}, M:{med}, L:{low}",
-        "",
-        "SYNTHETIC DETECTOR",
-        f"Probability: {prob:.2f}",
-        f"Label: {label}",
-        "",
-        f"Generated: {data['timestamp']}"
-    ]
-    ax5.text(0.5, 0.5, "\n".join(block3),
-             fontsize=11, ha="center", va="center",
-             family="monospace",
-             bbox=dict(boxstyle="round", fc="#DFFFD8", ec="black"))
-    plt.savefig(outpath, dpi=300, bbox_inches="tight")
-    plt.close()
-    return outpath
 # ============================================================
 # MAIN ANALYSIS FUNCTION
 # ============================================================
-def analyze_audio(file, progress=gr.Progress()):
-    if file is None:
-        return None, "Please upload an audio file."
-    try:
-        progress(0.1)
-        p = Path(file)
-        info = read_audio_info(str(p))
-        y, sr = librosa.load(str(p), sr=None, mono=True)
-        progress(0.3)
-        tstats = compute_time_domain_stats(y)
-        progress(0.5)
-        spec = compute_spectral_analysis(y, sr)
-        progress(0.6)
         lufs = compute_loudness(y, sr) if LOUDNESS_AVAILABLE else None
-        progress(0.7)
-        issues = detect_audio_issues(spec, tstats)
-        progress(0.75)
-        prob, label = detect_synthetic_voice(y, sr, spec)
-        data = {
-            "filename": p.name,
             "info": info,
-            "time_stats": tstats,
-            "spectral": spec,
             "lufs": lufs,
             "issues": issues,
-            "synthetic_prob": prob,
-            "synthetic_label": label,
             "timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S")
         }
-        outdir = Path("reports")
-        outdir.mkdir(exist_ok=True)
-        outpng = outdir / f"{p.stem}_report.png"
-        progress(0.9)
-        create_report(data, str(outpng))
-        progress(1.0)
-        summary = f"""
-# 🎧 Audio Forensic Analyzer
-## File: `{p.name}`
-### **Synthetic Detector**
-- Probability: **{prob:.2f}**
-- Label: **{label}**
 ---
-### **Quality Metrics**
-- Peak: {tstats['peak_db']:.2f} dBFS
-- RMS: {tstats['rms_db']:.2f} dBFS
-- Crest Factor: {tstats['crest_factor_db']:.2f} dB
-- SNR: {tstats['snr_db']:.1f} dB
 ---
-### **Spectral**
-- Centroid: {spec['spectral_centroid']:.1f} Hz
-- Rolloff 85%: {spec['rolloff_85pct']:.1f} Hz
-- Highest -60 dB: {spec['highest_freq_minus60db']:.1f} Hz
 ---
-### **Issues Detected:** {len(issues)}
 """
-        for typ, sev, desc in issues:
-            summary += f"- **[{sev}] {typ}** → {desc}\n"
-        summary += f"\n---\n📊 **Report saved as:** `{outpng.name}`"
-        return str(outpng), summary
     except Exception as e:
         import traceback
         traceback.print_exc()
-        return None, f"Error: {e}"
 # ============================================================
-# UI
 # ============================================================
-with gr.Blocks(title="Audio Forensic Analyzer") as demo:
     gr.Markdown("""
-    # 🔍 Audio Forensic Analyzer
-    Upload an audio file to generate a complete forensic report.
-    **Now includes a lightweight AI-vs-Human synthetic detector (informational only).**
-    """)
     with gr.Row():
         with gr.Column(scale=1):
-            inp = gr.Audio(label="Upload Audio", type="filepath")
-            btn = gr.Button("Analyze", variant="primary")
-        with gr.Column(scale=2):
-            img = gr.Image(label="Report", type="filepath", height=600)
-    summary = gr.Markdown()
-    btn.click(analyze_audio, inputs=inp, outputs=[img, summary])
 if __name__ == "__main__":
-    demo.launch()

 import gradio as gr
+import librosa
+import soundfile as sf
 from pathlib import Path
 from datetime import datetime
+# -------------------------------
+# Import internal modules
+# -------------------------------
+from read_audio_info import read_audio_info
+from time_domain import compute_time_domain_stats
+from spectral import compute_spectral_analysis
+from loudness_calc import compute_loudness, LOUDNESS_AVAILABLE
+from issue_detection import detect_audio_issues
+from synthetic_detector import detect_synthetic_voice
+from scoring import compute_quality_score
+from report_generator import create_report
 # ============================================================
 # MAIN ANALYSIS FUNCTION
 # ============================================================
+def analyze_audio(audio_file, progress=gr.Progress()):
+    """Main Gradio callback — performs full forensic analysis."""
+    if audio_file is None:
+        return None, "⚠️ Please upload an audio file."
+    try:
+        progress(0.1, desc="Reading audio file...")
+        path = Path(audio_file)
+        info = read_audio_info(str(path))
+        progress(0.25, desc="Loading waveform...")
+        y, sr = librosa.load(str(path), sr=None, mono=True)
+        # ======================================================
+        # TIME DOMAIN
+        # ======================================================
+        progress(0.35, desc="Analyzing time-domain...")
+        time_stats = compute_time_domain_stats(y)
+        # ======================================================
+        # SPECTRAL
+        # ======================================================
+        progress(0.50, desc="Computing spectral analysis...")
+        spectral = compute_spectral_analysis(y, sr)
+        # ======================================================
+        # LOUDNESS
+        # ======================================================
+        progress(0.60, desc="Computing loudness...")
         lufs = compute_loudness(y, sr) if LOUDNESS_AVAILABLE else None
+        # ======================================================
+        # ISSUES
+        # ======================================================
+        progress(0.70, desc="Detecting issues...")
+        issues = detect_audio_issues(spectral, time_stats)
+        # ======================================================
+        # SYNTHETIC DETECTION (informational only)
+        # ======================================================
+        progress(0.78, desc="Synthetic voice estimation...")
+        synthetic = detect_synthetic_voice(y, sr, spectral)
+        # ======================================================
+        # SCORING
+        # ======================================================
+        progress(0.82, desc="Scoring...")
+        score = compute_quality_score(issues)
+        # ======================================================
+        # CREATE REPORT PNG
+        # ======================================================
+        output_dir = Path("reports")
+        output_dir.mkdir(exist_ok=True)
+        output_file = output_dir / (path.stem + "_report.png")
+        audio_data = {
+            "filename": path.name,
             "info": info,
+            "time_stats": time_stats,
+            "spectral": spectral,
             "lufs": lufs,
             "issues": issues,
+            "score": score,
+            "synthetic": synthetic,
             "timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S")
         }
+        progress(0.92, desc="Rendering PNG report...")
+        create_report(audio_data, str(output_file))
+        # ======================================================
+        # SUMMARY MARKDOWN
+        # ======================================================
+        progress(1.0, desc="Done!")
+        md = f"""
+# 🎵 Analysis Complete — Audio Forensic Analyzer
+## File Information
+- **Filename:** `{audio_data['filename']}`
+- **Duration:** {info['duration']:.2f}s
+- **Sample Rate:** {info['samplerate']} Hz
+- **Channels:** {info['channels']}
 ---
+## 🔍 Quality Assessment
+- **Score:** {score['score']}/100
+- **Grade:** {score['grade']}
+- **Quality:** {score['quality']}
+- **Recommendation:** {score['recommendation']}
+---
+## 🔧 Time-Domain Stats
+| Metric | Value |
+|--------|--------|
+| Peak Level | {time_stats['peak_db']:.2f} dBFS |
+| RMS Level | {time_stats['rms_db']:.2f} dBFS |
+| Crest Factor | {time_stats['crest_factor_db']:.2f} dB |
+| SNR | {time_stats['snr_db']:.1f} dB |
+| ZCR | {time_stats['zero_crossing_rate']:.4f} |
+"""
+        if lufs is not None:
+            md += f"| Integrated LUFS | {lufs:.2f} LUFS |\n"
+        md += f"""
 ---
+## 🎚 Synthetic Voice Estimate (Informational Only)
+- **Probability:** {synthetic['synthetic_probability']:.2f}
+- **Label:** **{synthetic['synthetic_label']}**
 ---
+## ⚠️ Issues Detected: {len(issues)}
 """
+        if issues:
+            icons = {"CRITICAL": "🔴", "HIGH": "🟠", "MEDIUM": "🟡", "LOW": "🟢"}
+            for issue, sev, desc in issues:
+                md += f"- {icons.get(sev,'⚪')} **[{sev}] {issue}** — {desc}\n"
+        else:
+            md += "- ✅ No significant issues\n"
+        md += f"""
+---
+📊 **Report PNG saved:** `{output_file.name}`
+"""
+        return str(output_file), md
     except Exception as e:
         import traceback
         traceback.print_exc()
+        return None, f"# ❌ Analysis Failed\n{str(e)}"
 # ============================================================
+# GRADIO UI
 # ============================================================
+with gr.Blocks(title="Audio Forensic Analyzer", theme="soft") as demo:
     gr.Markdown("""
+# 🎧 Audio Forensic Analyzer
+Upload an audio file and generate a **full forensic report**:
+- HF/LF rolloff
+- Filtering (LPF/HPF/Brickwall)
+- Compression & clipping
+- Noise reduction artifacts
+- Spectral notches
+- Loudness (LUFS)
+- Synthetic Voice Probability (Informational Only)
+Report includes a PNG + formatted summary.
+""")
     with gr.Row():
         with gr.Column(scale=1):
+            audio_input = gr.Audio(
+                label="📁 Upload Audio",
+                type="filepath",
+                sources=["upload"]
+            )
+            analyze_button = gr.Button("🔍 Analyze Audio", variant="primary")
+        with gr.Column(scale=2):
+            png_output = gr.Image(
+                label="📊 Forensic Report (PNG)",
+                type="filepath",
+                height=600
+            )
+    summary_output = gr.Markdown(label="📋 Summary Report")
+    analyze_button.click(
+        fn=analyze_audio,
+        inputs=[audio_input],
+        outputs=[png_output, summary_output]
+    )
+# Run in HuggingFace Space
 if __name__ == "__main__":
+    demo.launch(server_name="0.0.0.0", server_port=7860, show_error=True)