File size: 8,256 Bytes
6ad5106
 
 
cbc5199
 
ee0d393
ac95ccb
 
 
 
 
 
77bde25
d278c12
ea9bfe4
153e400
703a7e5
ea9bfe4
 
 
956259e
d278c12
ea9bfe4
 
 
956259e
ea9bfe4
 
d278c12
77bde25
42ea287
ea9bfe4
 
 
 
 
77bde25
d278c12
ea9bfe4
 
77bde25
d278c12
ea9bfe4
 
77bde25
 
6ad5106
956259e
77bde25
 
ea9bfe4
 
77bde25
 
ea9bfe4
 
77bde25
 
4f20d78
77bde25
 
 
d278c12
 
 
 
77bde25
d278c12
976efb4
77bde25
976efb4
d278c12
976efb4
8186a73
42ea287
d278c12
976efb4
8186a73
42ea287
d278c12
976efb4
8186a73
42ea287
4f20d78
976efb4
8186a73
42ea287
d278c12
976efb4
 
d278c12
 
 
976efb4
 
 
 
 
 
 
 
 
 
 
ea9bfe4
77bde25
ea9bfe4
 
6ad5106
ea9bfe4
 
6ad5106
 
d278c12
ea9bfe4
6ad5106
 
956259e
77bde25
 
42ea287
 
 
d278c12
ea9bfe4
956259e
77bde25
d278c12
42ea287
956259e
ea9bfe4
42ea287
6ad5106
42ea287
d278c12
ea9bfe4
 
d278c12
6ad5106
 
 
42ea287
 
 
 
77bde25
42ea287
 
 
 
77bde25
 
42ea287
 
 
 
 
d278c12
 
 
 
ea9bfe4
 
 
42ea287
ea9bfe4
 
42ea287
 
 
 
ea9bfe4
 
 
42ea287
ea9bfe4
42ea287
 
 
 
 
 
 
6ad5106
b8b02b5
956259e
42ea287
 
 
 
 
 
 
 
77bde25
42ea287
 
 
 
77bde25
 
 
6ad5106
 
 
ea9bfe4
6ad5106
956259e
ea9bfe4
42ea287
ea9bfe4
77bde25
ea9bfe4
42ea287
956259e
ea9bfe4
 
 
42ea287
d278c12
ea9bfe4
 
 
956259e
6ad5106
 
 
ea9bfe4
b8b02b5
 
77bde25
ea9bfe4
6ad5106
42ea287
77bde25
 
 
 
 
 
 
 
 
 
956259e
6ad5106
 
42ea287
 
ea9bfe4
42ea287
ea9bfe4
42ea287
ea9bfe4
77bde25
956259e
6ad5106
ea9bfe4
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
import gradio as gr
from pathlib import Path
from datetime import datetime
import warnings
warnings.filterwarnings('ignore')

import numpy as np
import librosa
import soundfile as sf
import matplotlib.pyplot as plt
import scipy.signal as sps

# Local Modules (must exist in repo root)
from io_utils import read_audio_info
from time_domain import compute_time_domain_stats
from spectral import compute_spectral_analysis
from loudness_calc import compute_loudness, LOUDNESS_AVAILABLE
from issue_detection import detect_audio_issues
from synthetic_detector import detect_synthetic_voice
from report_generator import create_report


def analyze_audio(audio_file, progress=gr.Progress()):
    if audio_file is None:
        return None, "⚠️ Please upload an audio file."

    try:
        path = Path(audio_file)

        # File info & load
        progress(0.10, desc="Reading file...")
        info = read_audio_info(str(path))

        progress(0.25, desc="Loading waveform...")
        y, sr = librosa.load(str(path), sr=None, mono=True)

        # Time-domain
        progress(0.35, desc="Time-domain analysis...")
        time_stats = compute_time_domain_stats(y)

        # Spectral
        progress(0.50, desc="Spectral analysis...")
        spectral = compute_spectral_analysis(y, sr)

        # Loudness
        progress(0.60, desc="Computing LUFS...")
        lufs = compute_loudness(y, sr) if LOUDNESS_AVAILABLE else None

        # Issue detection
        progress(0.70, desc="Detecting issues...")
        issues = detect_audio_issues(spectral, time_stats)

        # Synthetic detection (informational)
        progress(0.78, desc="Synthetic voice estimation...")
        synthetic = detect_synthetic_voice(y, sr, spectral)

        # Scoring
        progress(0.82, desc="Scoring...")
        critical = sum(1 for _, sev, _ in issues if sev == "CRITICAL")
        high = sum(1 for _, sev, _ in issues if sev == "HIGH")
        medium = sum(1 for _, sev, _ in issues if sev == "MEDIUM")
        low = sum(1 for _, sev, _ in issues if sev == "LOW")

        score_value = 100 - (critical * 30) - (high * 15) - (medium * 5)
        score_value = max(0, score_value)

        # Matplotlib-safe colors
        if score_value >= 90:
            grade, quality = "A", "EXCELLENT"
            color = "#b3ffb3"
            recommendation = "Excellent for TTS dataset"
        elif score_value >= 75:
            grade, quality = "B", "GOOD"
            color = "#ccffcc"
            recommendation = "Good quality; suitable for TTS"
        elif score_value >= 60:
            grade, quality = "C", "FAIR"
            color = "#fff6b3"
            recommendation = "Fair; contains noticeable processing artifacts"
        elif score_value >= 40:
            grade, quality = "D", "POOR"
            color = "#ffd9b3"
            recommendation = "Poor quality; not recommended for TTS"
        else:
            grade, quality = "F", "CRITICAL"
            color = "#ffb3b3"
            recommendation = "Severely degraded or heavily processed audio"

        cleanliness_score = max(0, 100 - (medium * 5 + low * 3))
        processing_severity = (critical * 3) + (high * 2) + medium

        score_dict = {
            "score": score_value,
            "grade": grade,
            "quality": quality,
            "recommendation": recommendation,
            "cleanliness_score": cleanliness_score,
            "processing_severity": processing_severity,
            "critical": critical,
            "high": high,
            "medium": medium,
            "low": low,
            "color": color
        }

        # Build audio_data payload
        audio_data = {
            "filename": path.name,
            "info": info,
            "time_stats": time_stats,
            "spectral": spectral,
            "lufs": lufs,
            "issues": issues,
            "score": score_dict,
            "synthetic": synthetic,
            "timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S")
        }

        # Create reports dir
        progress(0.92, desc="Rendering PNG report...")
        report_dir = Path("reports")
        report_dir.mkdir(exist_ok=True)
        output_file = report_dir / f"{path.stem}_report.png"

        create_report(audio_data, str(output_file))

        # Build Markdown summary (with spectral block)
        s = score_dict
        e = spectral["energy_distribution"]

        md = f"""
# 🎡 Audio Forensic Summary Report

## πŸ“ File Information
- **Name:** `{audio_data['filename']}`
- **Duration:** {info['duration']:.2f}s  
- **Sample Rate:** {info['samplerate']} Hz  
- **Channels:** {info['channels']}

---

## 🎚 Loudness (ITU-R BS.1770-3)
"""

        if lufs is not None:
            md += f"- **Integrated LUFS:** {lufs:.2f} LUFS  \n"
            if -25 <= lufs <= -21:
                md += f"  - **Status:** PASS βœ… (Compliant βˆ’23 LUFS Β±2)\n"
            else:
                md += f"  - **Status:** FAIL ❌ (Not compliant with βˆ’23 LUFS Β±2)\n"
        else:
            md += "- **Integrated LUFS:** Not available (pyloudnorm missing)  \n"

        md += f"""
---

## πŸ§ͺ Audio Quality Score
- **Score:** {s['score']}/100  
- **Grade:** {s['grade']}  
- **Quality:** {s['quality']}  
- **Recommendation:** {s['recommendation']}

---

## πŸ”§ Time-Domain Characteristics
| Metric | Value |
|--------|--------|
| Peak Level | {time_stats['peak_db']:.2f} dBFS |
| RMS Level | {time_stats['rms_db']:.2f} dBFS |
| Crest Factor | {time_stats['crest_factor_db']:.2f} dB |
| Noise Floor | {time_stats['noise_floor']:.6f} |
| SNR | {time_stats['snr_db']:.1f} dB |
| ZCR | {time_stats['zero_crossing_rate']:.4f} |

---

## πŸŽ› Spectral Analysis
| Parameter | Value |
|----------|--------|
| Spectral Centroid | {spectral['spectral_centroid']:.1f} Hz |
| 85% Rolloff | {spectral['rolloff_85pct']:.1f} Hz |
| 95% Rolloff | {spectral['rolloff_95pct']:.1f} Hz |
| Highest Frequency (βˆ’60 dB) | {spectral['highest_freq_minus60db']:.1f} Hz |

---

## πŸ”Š Energy Distribution (Speech Frequency Bands)
| Band | Energy % |
|------|----------|
| <100 Hz | {e['below_100hz']:.2f}% |
| 100–500 Hz | {e['100_500hz']:.2f}% |
| 500–2k Hz | {e['500_2khz']:.2f}% |
| 2k–8k Hz | {e['2k_8khz']:.2f}% |
| 8k–12k Hz | {e['8k_12khz']:.2f}% |
| 12k–16k Hz | {e['12k_16khz']:.2f}% |
| >16k Hz | {e['above_16khz']:.2f}% |

---

## πŸ€– Synthetic Voice Estimate (Informational Only)
- **Probability:** {synthetic.get('synthetic_probability', 0.0):.3f}
- **Label:** {synthetic.get('synthetic_label', 'unknown')}

---

## ⚠️ Issues Detected: {len(issues)}
"""

        if issues:
            icons = {"CRITICAL":"πŸ”΄","HIGH":"🟠","MEDIUM":"🟑","LOW":"🟒"}
            for issue, sev, desc in issues:
                md += f"- {icons.get(sev,'βšͺ')} **[{sev}] {issue}** β€” {desc}\n"
        else:
            md += "- βœ… No issues detected.\n"

        md += f"""
---

πŸ“Š **PNG Forensic Report Saved:** `{output_file.name}`  
πŸ•’ Generated: {audio_data['timestamp']}
"""

        return str(output_file), md

    except Exception as e:
        import traceback
        traceback.print_exc()
        return None, f"# ❌ Analysis Failed\n{str(e)}"


# Gradio UI
with gr.Blocks(title="Audio Forensic Analyzer", theme="soft") as demo:
    gr.Markdown("""
# 🎧 AUDIO FORENSIC ANALYZER
Upload an audio file to generate a forensic-quality report:
- HF/LF rolloff detection
- LPF/HPF / Brickwall detection
- Noise-reduction artifacts
- Compression and clipping indicators
- Spectral notches
- LUFS (ITU-R BS.1770-3) check
- Synthetic voice estimation (informational)
Outputs a PNG report + Markdown summary
    """)

    with gr.Row():
        with gr.Column(scale=1):
            audio_in = gr.Audio(label="πŸ“ Upload Audio", type="filepath")
            analyze_btn = gr.Button("πŸ” Analyze Audio", variant="primary")
        with gr.Column(scale=2):
            png_out = gr.Image(label="πŸ“Š Forensic PNG Report", type="filepath", height=600)

    summary_out = gr.Markdown(label="πŸ“‹ Summary Report")

    analyze_btn.click(fn=analyze_audio, inputs=[audio_in], outputs=[png_out, summary_out])

if __name__ == "__main__":
    demo.launch(server_name="0.0.0.0", server_port=7860, show_error=True)