Spaces:

Aynursusuz
/

Audio-Quality-Assessment

Build error

App Files Files Community

Aynursusuz commited on Dec 19, 2025

Commit

8b61ed4

1 Parent(s): 0676533

Ultra simple Interface version

Browse files

Files changed (1) hide show

app.py +95 -191

app.py CHANGED Viewed

@@ -6,205 +6,109 @@ import io
 from PIL import Image
 from scipy.stats import kurtosis, skew
-ALL_METRICS = [
-    "SNR (dB)", "RMS Energy", "Peak Level (dB)", "Crest Factor", "Dynamic Range (dB)",
-    "Zero Crossing Rate", "Spectral Centroid (Hz)", "Spectral Rolloff (Hz)",
-    "Spectral Bandwidth (Hz)", "Spectral Flatness", "Spectral Contrast (Mean)",
-    "Harmonic-to-Noise Ratio (dB)", "THD (%)", "Tempo (BPM)", "Onset Strength (Mean)",
-    "Kurtosis", "Skewness", "Fundamental Frequency (Hz)", "Loudness (approx LUFS)",
-    "Silence Ratio", "Clipping Ratio (%)", "MFCC Mean", "MFCC Std Dev",
-    "Chroma Mean", "Chroma Std Dev"
-]
-def calculate_metrics(y, sr):
-    metrics = {}
-    # Amplitude & Energy
-    noise_floor = np.percentile(np.abs(y), 5)
-    signal_power = np.mean(y ** 2)
-    noise_power = noise_floor ** 2
-    snr = 10 * np.log10(signal_power / (noise_power + 1e-10))
-    metrics['SNR (dB)'] = round(snr, 2)
-    rms = np.sqrt(np.mean(y ** 2))
-    metrics['RMS Energy'] = round(rms, 4)
-    metrics['Peak Level (dB)'] = round(20 * np.log10(np.max(np.abs(y)) + 1e-10), 2)
-    metrics['Crest Factor'] = round(np.max(np.abs(y)) / (rms + 1e-10), 2)
-    dynamic_range = 20 * np.log10(np.max(np.abs(y)) / (np.min(np.abs(y[y != 0])) + 1e-10))
-    metrics['Dynamic Range (dB)'] = round(dynamic_range, 2)
-    # Spectral
-    metrics['Zero Crossing Rate'] = round(np.mean(librosa.feature.zero_crossing_rate(y)), 4)
-    metrics['Spectral Centroid (Hz)'] = round(np.mean(librosa.feature.spectral_centroid(y=y, sr=sr)[0]), 2)
-    metrics['Spectral Rolloff (Hz)'] = round(np.mean(librosa.feature.spectral_rolloff(y=y, sr=sr)[0]), 2)
-    metrics['Spectral Bandwidth (Hz)'] = round(np.mean(librosa.feature.spectral_bandwidth(y=y, sr=sr)[0]), 2)
-    metrics['Spectral Flatness'] = round(np.mean(librosa.feature.spectral_flatness(y=y)[0]), 4)
-    metrics['Spectral Contrast (Mean)'] = round(np.mean(librosa.feature.spectral_contrast(y=y, sr=sr)), 2)
-    # Harmonic
-    y_harmonic, y_percussive = librosa.effects.hpss(y)
-    harmonic_power = np.mean(y_harmonic ** 2)
-    percussive_power = np.mean(y_percussive ** 2)
-    hnr = 10 * np.log10((harmonic_power + 1e-10) / (percussive_power + 1e-10))
-    metrics['Harmonic-to-Noise Ratio (dB)'] = round(hnr, 2)
-    fft = np.abs(np.fft.rfft(y))
-    if len(fft) > 1:
-        fundamental_idx = np.argmax(fft[1:]) + 1
-        fundamental_power = fft[fundamental_idx] ** 2
-        harmonic_indices = [i * fundamental_idx for i in range(2, 6) if i * fundamental_idx < len(fft)]
-        harmonic_power = np.sum([fft[idx] ** 2 for idx in harmonic_indices])
-        thd = np.sqrt(harmonic_power / (fundamental_power + 1e-10)) * 100
-        metrics['THD (%)'] = round(min(thd, 100), 2)
-    else:
-        metrics['THD (%)'] = 0.0
-    # Temporal
-    try:
-        tempo, _ = librosa.beat.beat_track(y=y, sr=sr)
-        metrics['Tempo (BPM)'] = round(float(tempo), 1)
-    except:
-        metrics['Tempo (BPM)'] = 0.0
-    metrics['Onset Strength (Mean)'] = round(np.mean(librosa.onset.onset_strength(y=y, sr=sr)), 4)
-    # Statistical
-    metrics['Kurtosis'] = round(kurtosis(y), 2)
-    metrics['Skewness'] = round(skew(y), 2)
-    # Frequency
     try:
-        f0 = librosa.yin(y, fmin=50, fmax=400, sr=sr)
-        f0_mean = np.nanmean(f0[f0 > 0])
-        metrics['Fundamental Frequency (Hz)'] = round(f0_mean, 2) if not np.isnan(f0_mean) else 0.0
-    except:
-        metrics['Fundamental Frequency (Hz)'] = 0.0
-    # Perceptual
-    metrics['Loudness (approx LUFS)'] = round(20 * np.log10(rms + 1e-10), 2)
-    silence_threshold = 0.01
-    metrics['Silence Ratio'] = round(np.sum(np.abs(y) < silence_threshold) / len(y), 4)
-    clipping_threshold = 0.99
-    metrics['Clipping Ratio (%)'] = round(np.sum(np.abs(y) > clipping_threshold) / len(y) * 100, 4)
-    # MFCC & Chroma
-    mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
-    metrics['MFCC Mean'] = round(np.mean(mfcc), 4)
-    metrics['MFCC Std Dev'] = round(np.std(mfcc), 4)
-    chroma = librosa.feature.chroma_stft(y=y, sr=sr)
-    metrics['Chroma Mean'] = round(np.mean(chroma), 4)
-    metrics['Chroma Std Dev'] = round(np.std(chroma), 4)
-    # Quality Score
-    quality_score = (
-        min(max(snr, 0), 40) / 40 * 20 +
-        (1 - min(metrics['THD (%)'], 10) / 10) * 15 +
-        min(max(dynamic_range, 0), 60) / 60 * 15 +
-        min(rms, 0.7) / 0.7 * 10 +
-        (1 - metrics['Spectral Flatness']) * 10 +
-        (1 - min(metrics['Clipping Ratio (%)'], 100) / 100) * 15 +
-        min(max(hnr, -10), 20) / 30 * 15
-    )
-    metrics['Overall Quality Score'] = round(quality_score, 1)
-    return metrics
-def create_viz(y, sr, selected):
-    show_wave = any(m in selected for m in ["SNR (dB)", "RMS Energy", "Peak Level (dB)"])
-    show_spec = "Spectral Centroid (Hz)" in selected
-    show_mel = "Spectral Flatness" in selected
-    show_mfcc = "MFCC Mean" in selected
-    plots = [show_wave, show_spec, show_mel, show_mfcc]
-    num = sum(plots) or 2
-    fig, axes = plt.subplots((num + 1) // 2, 2, figsize=(12, 4 * ((num + 1) // 2)))
-    if num == 1:
-        axes = [axes]
-    else:
-        axes = axes.flatten()
-    idx = 0
-    if show_wave:
-        axes[idx].plot(np.linspace(0, len(y)/sr, len(y)), y, linewidth=0.5)
-        axes[idx].set_title('Waveform')
-        axes[idx].set_xlabel('Time (s)')
-        axes[idx].grid(True, alpha=0.3)
-        idx += 1
-    if show_spec:
-        D = librosa.amplitude_to_db(np.abs(librosa.stft(y)), ref=np.max)
-        librosa.display.specshow(D, sr=sr, x_axis='time', y_axis='hz', ax=axes[idx])
-        axes[idx].set_title('Spectrogram')
-        idx += 1
-    if show_mel:
-        S = librosa.feature.melspectrogram(y=y, sr=sr)
-        S_dB = librosa.power_to_db(S, ref=np.max)
-        librosa.display.specshow(S_dB, sr=sr, x_axis='time', y_axis='mel', ax=axes[idx])
-        axes[idx].set_title('Mel Spectrogram')
-        idx += 1
-    if show_mfcc:
-        mfcc = librosa.feature.mfcc(y=y, sr=sr)
-        librosa.display.specshow(mfcc, sr=sr, x_axis='time', ax=axes[idx])
-        axes[idx].set_title('MFCC')
-        idx += 1
-    for i in range(idx, len(axes)):
-        axes[i].axis('off')
-    plt.tight_layout()
-    buf = io.BytesIO()
-    plt.savefig(buf, format='png', dpi=150, bbox_inches='tight')
-    buf.seek(0)
-    img = Image.open(buf)
-    plt.close()
-    return img
-def analyze(audio, selected):
-    if not audio or not selected:
-        return None, "Please upload audio and select metrics"
-    try:
-        y, sr = librosa.load(audio, sr=None, mono=True)
         if len(y) < sr * 0.1:
-            return None, "Audio too short"
-        all_metrics = calculate_metrics(y, sr)
-        viz = create_viz(y, sr, selected)
-        score = all_metrics['Overall Quality Score']
-        status = "Excellent" if score >= 80 else "Good" if score >= 60 else "Fair" if score >= 40 else "Poor"
-        output = f"## Quality Score: {score}/100 ({status})\n\n"
-        for k, v in all_metrics.items():
-            if k in selected or k == "Overall Quality Score":
-                output += f"**{k}:** {v}\n"
-        return viz, output
     except Exception as e:
         return None, f"Error: {str(e)}"
-with gr.Blocks(title="Audio Quality Assessment") as demo:
-    gr.Markdown("# Audio Quality Assessment\n### Professional audio analysis")
-    with gr.Row():
-        with gr.Column():
-            audio = gr.Audio(label="Upload Audio", type="filepath")
-            metrics = gr.CheckboxGroup(
-                choices=ALL_METRICS,
-                value=["SNR (dB)", "RMS Energy", "THD (%)"],
-                label="Select Metrics"
-            )
-            btn = gr.Button("Analyze", variant="primary")
-        with gr.Column():
-            img = gr.Image(label="Visualization")
-            text = gr.Textbox(label="Metrics", lines=15)
-    btn.click(analyze, inputs=[audio, metrics], outputs=[img, text])
-demo.launch(share=True)

 from PIL import Image
 from scipy.stats import kurtosis, skew
+def analyze_audio(audio_file, snr, rms, peak, thd, spectral, mfcc_check):
+    if audio_file is None:
+        return None, "Please upload an audio file"
     try:
+        # Load audio
+        y, sr = librosa.load(audio_file, sr=None, mono=True)
         if len(y) < sr * 0.1:
+            return None, "Audio file too short"
+        # Calculate metrics
+        results = []
+        # Basic metrics
+        noise_floor = np.percentile(np.abs(y), 5)
+        signal_power = np.mean(y ** 2)
+        noise_power = noise_floor ** 2
+        snr_val = 10 * np.log10(signal_power / (noise_power + 1e-10))
+        rms_val = np.sqrt(np.mean(y ** 2))
+        peak_val = 20 * np.log10(np.max(np.abs(y)) + 1e-10)
+        # THD
+        fft = np.abs(np.fft.rfft(y))
+        if len(fft) > 1:
+            fundamental_idx = np.argmax(fft[1:]) + 1
+            fundamental_power = fft[fundamental_idx] ** 2
+            harmonic_indices = [i * fundamental_idx for i in range(2, 6) if i * fundamental_idx < len(fft)]
+            harmonic_power = np.sum([fft[idx] ** 2 for idx in harmonic_indices])
+            thd_val = np.sqrt(harmonic_power / (fundamental_power + 1e-10)) * 100
+        else:
+            thd_val = 0.0
+        # Spectral
+        spec_centroid = np.mean(librosa.feature.spectral_centroid(y=y, sr=sr)[0])
+        # MFCC
+        mfcc_feat = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
+        mfcc_mean = np.mean(mfcc_feat)
+        # Build output
+        output = "## Audio Quality Metrics\n\n"
+        if snr:
+            output += f"**SNR:** {snr_val:.2f} dB\n"
+        if rms:
+            output += f"**RMS Energy:** {rms_val:.4f}\n"
+        if peak:
+            output += f"**Peak Level:** {peak_val:.2f} dB\n"
+        if thd:
+            output += f"**THD:** {thd_val:.2f}%\n"
+        if spectral:
+            output += f"**Spectral Centroid:** {spec_centroid:.2f} Hz\n"
+        if mfcc_check:
+            output += f"**MFCC Mean:** {mfcc_mean:.4f}\n"
+        # Create visualization
+        fig, axes = plt.subplots(2, 1, figsize=(10, 6))
+        # Waveform
+        time = np.linspace(0, len(y) / sr, len(y))
+        axes[0].plot(time, y, linewidth=0.5)
+        axes[0].set_title('Waveform')
+        axes[0].set_xlabel('Time (s)')
+        axes[0].grid(True, alpha=0.3)
+        # Spectrogram
+        D = librosa.amplitude_to_db(np.abs(librosa.stft(y)), ref=np.max)
+        librosa.display.specshow(D, sr=sr, x_axis='time', y_axis='hz', ax=axes[1])
+        axes[1].set_title('Spectrogram')
+        plt.tight_layout()
+        buf = io.BytesIO()
+        plt.savefig(buf, format='png', dpi=100, bbox_inches='tight')
+        buf.seek(0)
+        img = Image.open(buf)
+        plt.close()
+        return img, output
     except Exception as e:
         return None, f"Error: {str(e)}"
+# Create interface
+iface = gr.Interface(
+    fn=analyze_audio,
+    inputs=[
+        gr.Audio(label="Upload Audio File", type="filepath"),
+        gr.Checkbox(label="SNR (Signal-to-Noise Ratio)", value=True),
+        gr.Checkbox(label="RMS Energy", value=True),
+        gr.Checkbox(label="Peak Level", value=True),
+        gr.Checkbox(label="THD (Total Harmonic Distortion)", value=True),
+        gr.Checkbox(label="Spectral Centroid", value=False),
+        gr.Checkbox(label="MFCC", value=False),
+    ],
+    outputs=[
+        gr.Image(label="Visualization", type="pil"),
+        gr.Textbox(label="Metrics", lines=10)
+    ],
+    title="Audio Quality Assessment",
+    description="Upload an audio file and select metrics to analyze"
+)
+iface.launch(share=True)