Spaces:
Sleeping
Sleeping
File size: 8,256 Bytes
6ad5106 cbc5199 ee0d393 ac95ccb 77bde25 d278c12 ea9bfe4 153e400 703a7e5 ea9bfe4 956259e d278c12 ea9bfe4 956259e ea9bfe4 d278c12 77bde25 42ea287 ea9bfe4 77bde25 d278c12 ea9bfe4 77bde25 d278c12 ea9bfe4 77bde25 6ad5106 956259e 77bde25 ea9bfe4 77bde25 ea9bfe4 77bde25 4f20d78 77bde25 d278c12 77bde25 d278c12 976efb4 77bde25 976efb4 d278c12 976efb4 8186a73 42ea287 d278c12 976efb4 8186a73 42ea287 d278c12 976efb4 8186a73 42ea287 4f20d78 976efb4 8186a73 42ea287 d278c12 976efb4 d278c12 976efb4 ea9bfe4 77bde25 ea9bfe4 6ad5106 ea9bfe4 6ad5106 d278c12 ea9bfe4 6ad5106 956259e 77bde25 42ea287 d278c12 ea9bfe4 956259e 77bde25 d278c12 42ea287 956259e ea9bfe4 42ea287 6ad5106 42ea287 d278c12 ea9bfe4 d278c12 6ad5106 42ea287 77bde25 42ea287 77bde25 42ea287 d278c12 ea9bfe4 42ea287 ea9bfe4 42ea287 ea9bfe4 42ea287 ea9bfe4 42ea287 6ad5106 b8b02b5 956259e 42ea287 77bde25 42ea287 77bde25 6ad5106 ea9bfe4 6ad5106 956259e ea9bfe4 42ea287 ea9bfe4 77bde25 ea9bfe4 42ea287 956259e ea9bfe4 42ea287 d278c12 ea9bfe4 956259e 6ad5106 ea9bfe4 b8b02b5 77bde25 ea9bfe4 6ad5106 42ea287 77bde25 956259e 6ad5106 42ea287 ea9bfe4 42ea287 ea9bfe4 42ea287 ea9bfe4 77bde25 956259e 6ad5106 ea9bfe4 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 | import gradio as gr
from pathlib import Path
from datetime import datetime
import warnings
warnings.filterwarnings('ignore')
import numpy as np
import librosa
import soundfile as sf
import matplotlib.pyplot as plt
import scipy.signal as sps
# Local Modules (must exist in repo root)
from io_utils import read_audio_info
from time_domain import compute_time_domain_stats
from spectral import compute_spectral_analysis
from loudness_calc import compute_loudness, LOUDNESS_AVAILABLE
from issue_detection import detect_audio_issues
from synthetic_detector import detect_synthetic_voice
from report_generator import create_report
def analyze_audio(audio_file, progress=gr.Progress()):
if audio_file is None:
return None, "β οΈ Please upload an audio file."
try:
path = Path(audio_file)
# File info & load
progress(0.10, desc="Reading file...")
info = read_audio_info(str(path))
progress(0.25, desc="Loading waveform...")
y, sr = librosa.load(str(path), sr=None, mono=True)
# Time-domain
progress(0.35, desc="Time-domain analysis...")
time_stats = compute_time_domain_stats(y)
# Spectral
progress(0.50, desc="Spectral analysis...")
spectral = compute_spectral_analysis(y, sr)
# Loudness
progress(0.60, desc="Computing LUFS...")
lufs = compute_loudness(y, sr) if LOUDNESS_AVAILABLE else None
# Issue detection
progress(0.70, desc="Detecting issues...")
issues = detect_audio_issues(spectral, time_stats)
# Synthetic detection (informational)
progress(0.78, desc="Synthetic voice estimation...")
synthetic = detect_synthetic_voice(y, sr, spectral)
# Scoring
progress(0.82, desc="Scoring...")
critical = sum(1 for _, sev, _ in issues if sev == "CRITICAL")
high = sum(1 for _, sev, _ in issues if sev == "HIGH")
medium = sum(1 for _, sev, _ in issues if sev == "MEDIUM")
low = sum(1 for _, sev, _ in issues if sev == "LOW")
score_value = 100 - (critical * 30) - (high * 15) - (medium * 5)
score_value = max(0, score_value)
# Matplotlib-safe colors
if score_value >= 90:
grade, quality = "A", "EXCELLENT"
color = "#b3ffb3"
recommendation = "Excellent for TTS dataset"
elif score_value >= 75:
grade, quality = "B", "GOOD"
color = "#ccffcc"
recommendation = "Good quality; suitable for TTS"
elif score_value >= 60:
grade, quality = "C", "FAIR"
color = "#fff6b3"
recommendation = "Fair; contains noticeable processing artifacts"
elif score_value >= 40:
grade, quality = "D", "POOR"
color = "#ffd9b3"
recommendation = "Poor quality; not recommended for TTS"
else:
grade, quality = "F", "CRITICAL"
color = "#ffb3b3"
recommendation = "Severely degraded or heavily processed audio"
cleanliness_score = max(0, 100 - (medium * 5 + low * 3))
processing_severity = (critical * 3) + (high * 2) + medium
score_dict = {
"score": score_value,
"grade": grade,
"quality": quality,
"recommendation": recommendation,
"cleanliness_score": cleanliness_score,
"processing_severity": processing_severity,
"critical": critical,
"high": high,
"medium": medium,
"low": low,
"color": color
}
# Build audio_data payload
audio_data = {
"filename": path.name,
"info": info,
"time_stats": time_stats,
"spectral": spectral,
"lufs": lufs,
"issues": issues,
"score": score_dict,
"synthetic": synthetic,
"timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S")
}
# Create reports dir
progress(0.92, desc="Rendering PNG report...")
report_dir = Path("reports")
report_dir.mkdir(exist_ok=True)
output_file = report_dir / f"{path.stem}_report.png"
create_report(audio_data, str(output_file))
# Build Markdown summary (with spectral block)
s = score_dict
e = spectral["energy_distribution"]
md = f"""
# π΅ Audio Forensic Summary Report
## π File Information
- **Name:** `{audio_data['filename']}`
- **Duration:** {info['duration']:.2f}s
- **Sample Rate:** {info['samplerate']} Hz
- **Channels:** {info['channels']}
---
## π Loudness (ITU-R BS.1770-3)
"""
if lufs is not None:
md += f"- **Integrated LUFS:** {lufs:.2f} LUFS \n"
if -25 <= lufs <= -21:
md += f" - **Status:** PASS β
(Compliant β23 LUFS Β±2)\n"
else:
md += f" - **Status:** FAIL β (Not compliant with β23 LUFS Β±2)\n"
else:
md += "- **Integrated LUFS:** Not available (pyloudnorm missing) \n"
md += f"""
---
## π§ͺ Audio Quality Score
- **Score:** {s['score']}/100
- **Grade:** {s['grade']}
- **Quality:** {s['quality']}
- **Recommendation:** {s['recommendation']}
---
## π§ Time-Domain Characteristics
| Metric | Value |
|--------|--------|
| Peak Level | {time_stats['peak_db']:.2f} dBFS |
| RMS Level | {time_stats['rms_db']:.2f} dBFS |
| Crest Factor | {time_stats['crest_factor_db']:.2f} dB |
| Noise Floor | {time_stats['noise_floor']:.6f} |
| SNR | {time_stats['snr_db']:.1f} dB |
| ZCR | {time_stats['zero_crossing_rate']:.4f} |
---
## π Spectral Analysis
| Parameter | Value |
|----------|--------|
| Spectral Centroid | {spectral['spectral_centroid']:.1f} Hz |
| 85% Rolloff | {spectral['rolloff_85pct']:.1f} Hz |
| 95% Rolloff | {spectral['rolloff_95pct']:.1f} Hz |
| Highest Frequency (β60 dB) | {spectral['highest_freq_minus60db']:.1f} Hz |
---
## π Energy Distribution (Speech Frequency Bands)
| Band | Energy % |
|------|----------|
| <100 Hz | {e['below_100hz']:.2f}% |
| 100β500 Hz | {e['100_500hz']:.2f}% |
| 500β2k Hz | {e['500_2khz']:.2f}% |
| 2kβ8k Hz | {e['2k_8khz']:.2f}% |
| 8kβ12k Hz | {e['8k_12khz']:.2f}% |
| 12kβ16k Hz | {e['12k_16khz']:.2f}% |
| >16k Hz | {e['above_16khz']:.2f}% |
---
## π€ Synthetic Voice Estimate (Informational Only)
- **Probability:** {synthetic.get('synthetic_probability', 0.0):.3f}
- **Label:** {synthetic.get('synthetic_label', 'unknown')}
---
## β οΈ Issues Detected: {len(issues)}
"""
if issues:
icons = {"CRITICAL":"π΄","HIGH":"π ","MEDIUM":"π‘","LOW":"π’"}
for issue, sev, desc in issues:
md += f"- {icons.get(sev,'βͺ')} **[{sev}] {issue}** β {desc}\n"
else:
md += "- β
No issues detected.\n"
md += f"""
---
π **PNG Forensic Report Saved:** `{output_file.name}`
π Generated: {audio_data['timestamp']}
"""
return str(output_file), md
except Exception as e:
import traceback
traceback.print_exc()
return None, f"# β Analysis Failed\n{str(e)}"
# Gradio UI
with gr.Blocks(title="Audio Forensic Analyzer", theme="soft") as demo:
gr.Markdown("""
# π§ AUDIO FORENSIC ANALYZER
Upload an audio file to generate a forensic-quality report:
- HF/LF rolloff detection
- LPF/HPF / Brickwall detection
- Noise-reduction artifacts
- Compression and clipping indicators
- Spectral notches
- LUFS (ITU-R BS.1770-3) check
- Synthetic voice estimation (informational)
Outputs a PNG report + Markdown summary
""")
with gr.Row():
with gr.Column(scale=1):
audio_in = gr.Audio(label="π Upload Audio", type="filepath")
analyze_btn = gr.Button("π Analyze Audio", variant="primary")
with gr.Column(scale=2):
png_out = gr.Image(label="π Forensic PNG Report", type="filepath", height=600)
summary_out = gr.Markdown(label="π Summary Report")
analyze_btn.click(fn=analyze_audio, inputs=[audio_in], outputs=[png_out, summary_out])
if __name__ == "__main__":
demo.launch(server_name="0.0.0.0", server_port=7860, show_error=True)
|