Spaces:

Aynursusuz
/

Audio-Quality-Assessment

Build error

App Files Files Community

Audio-Quality-Assessment / app.py

Aynursusuz

Ultra simple Interface version

8b61ed4 4 months ago

raw

history blame contribute delete

3.7 kB

	import gradio as gr
	import numpy as np
	import librosa
	import matplotlib.pyplot as plt
	import io
	from PIL import Image
	from scipy.stats import kurtosis, skew

	def analyze_audio(audio_file, snr, rms, peak, thd, spectral, mfcc_check):
	if audio_file is None:
	return None, "Please upload an audio file"

	try:
	# Load audio
	y, sr = librosa.load(audio_file, sr=None, mono=True)

	if len(y) < sr * 0.1:
	return None, "Audio file too short"

	# Calculate metrics
	results = []

	# Basic metrics
	noise_floor = np.percentile(np.abs(y), 5)
	signal_power = np.mean(y ** 2)
	noise_power = noise_floor ** 2
	snr_val = 10 * np.log10(signal_power / (noise_power + 1e-10))

	rms_val = np.sqrt(np.mean(y ** 2))
	peak_val = 20 * np.log10(np.max(np.abs(y)) + 1e-10)

	# THD
	fft = np.abs(np.fft.rfft(y))
	if len(fft) > 1:
	fundamental_idx = np.argmax(fft[1:]) + 1
	fundamental_power = fft[fundamental_idx] ** 2
	harmonic_indices = [i * fundamental_idx for i in range(2, 6) if i * fundamental_idx < len(fft)]
	harmonic_power = np.sum([fft[idx] ** 2 for idx in harmonic_indices])
	thd_val = np.sqrt(harmonic_power / (fundamental_power + 1e-10)) * 100
	else:
	thd_val = 0.0

	# Spectral
	spec_centroid = np.mean(librosa.feature.spectral_centroid(y=y, sr=sr)[0])

	# MFCC
	mfcc_feat = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
	mfcc_mean = np.mean(mfcc_feat)

	# Build output
	output = "## Audio Quality Metrics\n\n"

	if snr:
	output += f"SNR: {snr_val:.2f} dB\n"
	if rms:
	output += f"RMS Energy: {rms_val:.4f}\n"
	if peak:
	output += f"Peak Level: {peak_val:.2f} dB\n"
	if thd:
	output += f"THD: {thd_val:.2f}%\n"
	if spectral:
	output += f"Spectral Centroid: {spec_centroid:.2f} Hz\n"
	if mfcc_check:
	output += f"MFCC Mean: {mfcc_mean:.4f}\n"

	# Create visualization
	fig, axes = plt.subplots(2, 1, figsize=(10, 6))

	# Waveform
	time = np.linspace(0, len(y) / sr, len(y))
	axes[0].plot(time, y, linewidth=0.5)
	axes[0].set_title('Waveform')
	axes[0].set_xlabel('Time (s)')
	axes[0].grid(True, alpha=0.3)

	# Spectrogram
	D = librosa.amplitude_to_db(np.abs(librosa.stft(y)), ref=np.max)
	librosa.display.specshow(D, sr=sr, x_axis='time', y_axis='hz', ax=axes[1])
	axes[1].set_title('Spectrogram')

	plt.tight_layout()

	buf = io.BytesIO()
	plt.savefig(buf, format='png', dpi=100, bbox_inches='tight')
	buf.seek(0)
	img = Image.open(buf)
	plt.close()

	return img, output

	except Exception as e:
	return None, f"Error: {str(e)}"

	# Create interface
	iface = gr.Interface(
	fn=analyze_audio,
	inputs=[
	gr.Audio(label="Upload Audio File", type="filepath"),
	gr.Checkbox(label="SNR (Signal-to-Noise Ratio)", value=True),
	gr.Checkbox(label="RMS Energy", value=True),
	gr.Checkbox(label="Peak Level", value=True),
	gr.Checkbox(label="THD (Total Harmonic Distortion)", value=True),
	gr.Checkbox(label="Spectral Centroid", value=False),
	gr.Checkbox(label="MFCC", value=False),
	],
	outputs=[
	gr.Image(label="Visualization", type="pil"),
	gr.Textbox(label="Metrics", lines=10)
	],
	title="Audio Quality Assessment",
	description="Upload an audio file and select metrics to analyze"
	)

	iface.launch(share=True)