Spaces:

Mr7Explorer
/

audio_analyzer

Sleeping

File size: 8,256 Bytes

6ad5106
 
 
cbc5199
 
ee0d393
ac95ccb
 
 
 
 
 
77bde25
d278c12
ea9bfe4
153e400
703a7e5
ea9bfe4
 
 
956259e
d278c12
ea9bfe4
 
 
956259e
ea9bfe4
 
d278c12
77bde25
42ea287
ea9bfe4
 
 
 
 
77bde25
d278c12
ea9bfe4
 
77bde25
d278c12
ea9bfe4
 
77bde25
 
6ad5106
956259e
77bde25
 
ea9bfe4
 
77bde25
 
ea9bfe4
 
77bde25
 
4f20d78
77bde25
 
 
d278c12
 
 
 
77bde25
d278c12
976efb4
77bde25
976efb4
d278c12
976efb4
8186a73
42ea287
d278c12
976efb4
8186a73
42ea287
d278c12
976efb4
8186a73
42ea287
4f20d78
976efb4
8186a73
42ea287
d278c12
976efb4
 
d278c12
 
 
976efb4
 
 
 
 
 
 
 
 
 
 
ea9bfe4
77bde25
ea9bfe4
 
6ad5106
ea9bfe4
 
6ad5106
 
d278c12
ea9bfe4
6ad5106
 
956259e
77bde25
 
42ea287
 
 
d278c12
ea9bfe4
956259e
77bde25
d278c12
42ea287
956259e
ea9bfe4
42ea287
6ad5106
42ea287
d278c12
ea9bfe4
 
d278c12
6ad5106
 
 
42ea287
 
 
 
77bde25
42ea287
 
 
 
77bde25
 
42ea287
 
 
 
 
d278c12
 
 
 
ea9bfe4
 
 
42ea287
ea9bfe4
 
42ea287
 
 
 
ea9bfe4
 
 
42ea287
ea9bfe4
42ea287
 
 
 
 
 
 
6ad5106
b8b02b5
956259e
42ea287
 
 
 
 
 
 
 
77bde25
42ea287
 
 
 
77bde25
 
 
6ad5106
 
 
ea9bfe4
6ad5106
956259e
ea9bfe4
42ea287
ea9bfe4
77bde25
ea9bfe4
42ea287
956259e
ea9bfe4
 
 
42ea287
d278c12
ea9bfe4
 
 
956259e
6ad5106
 
 
ea9bfe4
b8b02b5
 
77bde25
ea9bfe4
6ad5106
42ea287
77bde25
 
 
 
 
 
 
 
 
 
956259e
6ad5106
 
42ea287
 
ea9bfe4
42ea287
ea9bfe4
42ea287
ea9bfe4
77bde25
956259e
6ad5106
ea9bfe4

import gradio as gr
from pathlib import Path
from datetime import datetime
import warnings
warnings.filterwarnings('ignore')

import numpy as np
import librosa
import soundfile as sf
import matplotlib.pyplot as plt
import scipy.signal as sps

# Local Modules (must exist in repo root)
from io_utils import read_audio_info
from time_domain import compute_time_domain_stats
from spectral import compute_spectral_analysis
from loudness_calc import compute_loudness, LOUDNESS_AVAILABLE
from issue_detection import detect_audio_issues
from synthetic_detector import detect_synthetic_voice
from report_generator import create_report


def analyze_audio(audio_file, progress=gr.Progress()):
    if audio_file is None:
        return None, "⚠️ Please upload an audio file."

    try:
        path = Path(audio_file)

        # File info & load
        progress(0.10, desc="Reading file...")
        info = read_audio_info(str(path))

        progress(0.25, desc="Loading waveform...")
        y, sr = librosa.load(str(path), sr=None, mono=True)

        # Time-domain
        progress(0.35, desc="Time-domain analysis...")
        time_stats = compute_time_domain_stats(y)

        # Spectral
        progress(0.50, desc="Spectral analysis...")
        spectral = compute_spectral_analysis(y, sr)

        # Loudness
        progress(0.60, desc="Computing LUFS...")
        lufs = compute_loudness(y, sr) if LOUDNESS_AVAILABLE else None

        # Issue detection
        progress(0.70, desc="Detecting issues...")
        issues = detect_audio_issues(spectral, time_stats)

        # Synthetic detection (informational)
        progress(0.78, desc="Synthetic voice estimation...")
        synthetic = detect_synthetic_voice(y, sr, spectral)

        # Scoring
        progress(0.82, desc="Scoring...")
        critical = sum(1 for _, sev, _ in issues if sev == "CRITICAL")
        high = sum(1 for _, sev, _ in issues if sev == "HIGH")
        medium = sum(1 for _, sev, _ in issues if sev == "MEDIUM")
        low = sum(1 for _, sev, _ in issues if sev == "LOW")

        score_value = 100 - (critical * 30) - (high * 15) - (medium * 5)
        score_value = max(0, score_value)

        # Matplotlib-safe colors
        if score_value >= 90:
            grade, quality = "A", "EXCELLENT"
            color = "#b3ffb3"
            recommendation = "Excellent for TTS dataset"
        elif score_value >= 75:
            grade, quality = "B", "GOOD"
            color = "#ccffcc"
            recommendation = "Good quality; suitable for TTS"
        elif score_value >= 60:
            grade, quality = "C", "FAIR"
            color = "#fff6b3"
            recommendation = "Fair; contains noticeable processing artifacts"
        elif score_value >= 40:
            grade, quality = "D", "POOR"
            color = "#ffd9b3"
            recommendation = "Poor quality; not recommended for TTS"
        else:
            grade, quality = "F", "CRITICAL"
            color = "#ffb3b3"
            recommendation = "Severely degraded or heavily processed audio"

        cleanliness_score = max(0, 100 - (medium * 5 + low * 3))
        processing_severity = (critical * 3) + (high * 2) + medium

        score_dict = {
            "score": score_value,
            "grade": grade,
            "quality": quality,
            "recommendation": recommendation,
            "cleanliness_score": cleanliness_score,
            "processing_severity": processing_severity,
            "critical": critical,
            "high": high,
            "medium": medium,
            "low": low,
            "color": color
        }

        # Build audio_data payload
        audio_data = {
            "filename": path.name,
            "info": info,
            "time_stats": time_stats,
            "spectral": spectral,
            "lufs": lufs,
            "issues": issues,
            "score": score_dict,
            "synthetic": synthetic,
            "timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S")
        }

        # Create reports dir
        progress(0.92, desc="Rendering PNG report...")
        report_dir = Path("reports")
        report_dir.mkdir(exist_ok=True)
        output_file = report_dir / f"{path.stem}_report.png"

        create_report(audio_data, str(output_file))

        # Build Markdown summary (with spectral block)
        s = score_dict
        e = spectral["energy_distribution"]

        md = f"""
# 🎵 Audio Forensic Summary Report

## 📁 File Information
- **Name:** `{audio_data['filename']}`
- **Duration:** {info['duration']:.2f}s  
- **Sample Rate:** {info['samplerate']} Hz  
- **Channels:** {info['channels']}

---

## 🎚 Loudness (ITU-R BS.1770-3)
"""

        if lufs is not None:
            md += f"- **Integrated LUFS:** {lufs:.2f} LUFS  \n"
            if -25 <= lufs <= -21:
                md += f"  - **Status:** PASS ✅ (Compliant −23 LUFS ±2)\n"
            else:
                md += f"  - **Status:** FAIL ❌ (Not compliant with −23 LUFS ±2)\n"
        else:
            md += "- **Integrated LUFS:** Not available (pyloudnorm missing)  \n"

        md += f"""
---

## 🧪 Audio Quality Score
- **Score:** {s['score']}/100  
- **Grade:** {s['grade']}  
- **Quality:** {s['quality']}  
- **Recommendation:** {s['recommendation']}

---

## 🔧 Time-Domain Characteristics
| Metric | Value |
|--------|--------|
| Peak Level | {time_stats['peak_db']:.2f} dBFS |
| RMS Level | {time_stats['rms_db']:.2f} dBFS |
| Crest Factor | {time_stats['crest_factor_db']:.2f} dB |
| Noise Floor | {time_stats['noise_floor']:.6f} |
| SNR | {time_stats['snr_db']:.1f} dB |
| ZCR | {time_stats['zero_crossing_rate']:.4f} |

---

## 🎛 Spectral Analysis
| Parameter | Value |
|----------|--------|
| Spectral Centroid | {spectral['spectral_centroid']:.1f} Hz |
| 85% Rolloff | {spectral['rolloff_85pct']:.1f} Hz |
| 95% Rolloff | {spectral['rolloff_95pct']:.1f} Hz |
| Highest Frequency (−60 dB) | {spectral['highest_freq_minus60db']:.1f} Hz |

---

## 🔊 Energy Distribution (Speech Frequency Bands)
| Band | Energy % |
|------|----------|
| <100 Hz | {e['below_100hz']:.2f}% |
| 100–500 Hz | {e['100_500hz']:.2f}% |
| 500–2k Hz | {e['500_2khz']:.2f}% |
| 2k–8k Hz | {e['2k_8khz']:.2f}% |
| 8k–12k Hz | {e['8k_12khz']:.2f}% |
| 12k–16k Hz | {e['12k_16khz']:.2f}% |
| >16k Hz | {e['above_16khz']:.2f}% |

---

## 🤖 Synthetic Voice Estimate (Informational Only)
- **Probability:** {synthetic.get('synthetic_probability', 0.0):.3f}
- **Label:** {synthetic.get('synthetic_label', 'unknown')}

---

## ⚠️ Issues Detected: {len(issues)}
"""

        if issues:
            icons = {"CRITICAL":"🔴","HIGH":"🟠","MEDIUM":"🟡","LOW":"🟢"}
            for issue, sev, desc in issues:
                md += f"- {icons.get(sev,'⚪')} **[{sev}] {issue}** — {desc}\n"
        else:
            md += "- ✅ No issues detected.\n"

        md += f"""
---

📊 **PNG Forensic Report Saved:** `{output_file.name}`  
🕒 Generated: {audio_data['timestamp']}
"""

        return str(output_file), md

    except Exception as e:
        import traceback
        traceback.print_exc()
        return None, f"# ❌ Analysis Failed\n{str(e)}"


# Gradio UI
with gr.Blocks(title="Audio Forensic Analyzer", theme="soft") as demo:
    gr.Markdown("""
# 🎧 AUDIO FORENSIC ANALYZER
Upload an audio file to generate a forensic-quality report:
- HF/LF rolloff detection
- LPF/HPF / Brickwall detection
- Noise-reduction artifacts
- Compression and clipping indicators
- Spectral notches
- LUFS (ITU-R BS.1770-3) check
- Synthetic voice estimation (informational)
Outputs a PNG report + Markdown summary
    """)

    with gr.Row():
        with gr.Column(scale=1):
            audio_in = gr.Audio(label="📁 Upload Audio", type="filepath")
            analyze_btn = gr.Button("🔍 Analyze Audio", variant="primary")
        with gr.Column(scale=2):
            png_out = gr.Image(label="📊 Forensic PNG Report", type="filepath", height=600)

    summary_out = gr.Markdown(label="📋 Summary Report")

    analyze_btn.click(fn=analyze_audio, inputs=[audio_in], outputs=[png_out, summary_out])

if __name__ == "__main__":
    demo.launch(server_name="0.0.0.0", server_port=7860, show_error=True)