Spaces:

jackvinati
/

WaveWizard

Sleeping

App Files Files Community

WaveWizard / app.py

jackvinati

Update app.py

ce656ac verified over 1 year ago

raw

history blame contribute delete

11 kB

	import numpy as np
	import librosa
	import librosa.display
	import matplotlib.pyplot as plt
	import soundfile as sf
	import gradio as gr
	import io
	import os
	import base64


	def analyze_audio_files(files, folder_path):
	output_html = ""
	file_paths = []

	# Handle inputs: files can be a list of file paths or a folder path
	if files:
	file_paths.extend(files)
	if folder_path:
	if os.path.isdir(folder_path):
	folder_files = [os.path.join(folder_path, f) for f in os.listdir(folder_path)
	if os.path.isfile(os.path.join(folder_path, f))]
	file_paths.extend(folder_files)
	else:
	return f"<p><strong>Folder not found:</strong> {folder_path}</p>"

	for audio_file in file_paths:
	try:
	# Load the audio file
	y, sr = librosa.load(audio_file, sr=None)

	# Get original bit depth from file metadata
	with sf.SoundFile(audio_file) as f:
	bit_depth_info = f.subtype_info

	# Time domain analysis
	duration = len(y) / sr

	# Frequency domain analysis
	desired_freq_resolution = 10.0 # in Hz

	# Calculate n_fft, limit it to a reasonable range
	n_fft = int(sr / desired_freq_resolution)
	n_fft = 2 ** int(np.ceil(np.log2(n_fft))) # Next power of two

	# Set maximum and minimum n_fft to avoid excessive computation
	max_n_fft = 32768
	min_n_fft = 1024
	n_fft = min(max(n_fft, min_n_fft), max_n_fft)

	hop_length = n_fft // 4

	# Compute the Short-Time Fourier Transform (STFT)
	S = np.abs(librosa.stft(y, n_fft=n_fft, hop_length=hop_length))

	# Compute the spectrogram (in dB)
	S_db = librosa.amplitude_to_db(S, ref=np.max)

	# Average over time to get the frequency spectrum
	S_mean = np.mean(S, axis=1)
	freqs = np.linspace(0, sr / 2, len(S_mean))

	# Plot the waveform
	fig_waveform = plt.figure(figsize=(8, 4))
	librosa.display.waveshow(y, sr=sr, alpha=0.5)
	plt.title('Waveform', fontsize=14)
	plt.xlabel('Time (s)', fontsize=12)
	plt.ylabel('Amplitude', fontsize=12)
	plt.tight_layout()
	waveform_image = io.BytesIO()
	plt.savefig(waveform_image, format='png', bbox_inches='tight')
	plt.close(fig_waveform)
	waveform_image.seek(0)
	waveform_base64 = base64.b64encode(
	waveform_image.read()).decode('utf-8')
	waveform_html = f'<img src="data:image/png;base64,{waveform_base64}" alt="Waveform">'

	# Calculate spectral features: spectral centroid, spectral bandwidth, and spectral rolloff
	spectral_centroids = librosa.feature.spectral_centroid(y=y, sr=sr)[
	0]
	spectral_bandwidth = librosa.feature.spectral_bandwidth(y=y, sr=sr)[
	0]
	spectral_rolloff = librosa.feature.spectral_rolloff(
	y=y, sr=sr, roll_percent=0.85)[0]
	times = librosa.times_like(spectral_centroids)

	# Plot the spectral features
	fig_spectral_features = plt.figure(figsize=(8, 4))
	plt.semilogy(times, spectral_centroids, label='Spectral Centroid')
	plt.semilogy(times, spectral_bandwidth, label='Spectral Bandwidth')
	plt.semilogy(times, spectral_rolloff,
	label='Spectral Rolloff', linestyle='--')
	plt.title('Spectral Features', fontsize=14)
	plt.xlabel('Time (s)', fontsize=12)
	plt.ylabel('Hz', fontsize=12)
	plt.legend(loc='upper right')
	plt.tight_layout()
	spectral_features_image = io.BytesIO()
	plt.savefig(spectral_features_image,
	format='png', bbox_inches='tight')
	plt.close(fig_spectral_features)
	spectral_features_image.seek(0)
	spectral_features_base64 = base64.b64encode(
	spectral_features_image.read()).decode('utf-8')
	spectral_features_html = f'<img src="data:image/png;base64,{spectral_features_base64}" alt="Spectral Features">'

	# Plot the frequency spectrum
	fig1 = plt.figure(figsize=(8, 4))
	plt.semilogx(freqs, 20 * np.log10(S_mean + 1e-10)) # Avoid log(0)
	plt.xlabel('Frequency (Hz)', fontsize=12)
	plt.ylabel('Amplitude (dB)', fontsize=12)
	plt.title('Frequency Spectrum', fontsize=14)
	plt.grid(True, which='both', ls='--')
	plt.xlim(20, sr / 2)
	plt.tight_layout()
	spectrum_image = io.BytesIO()
	plt.savefig(spectrum_image, format='png', bbox_inches='tight')
	plt.close(fig1)
	spectrum_image.seek(0)
	spectrum_base64 = base64.b64encode(
	spectrum_image.read()).decode('utf-8')
	spectrum_html = f'<img src="data:image/png;base64,{spectrum_base64}" alt="Frequency Spectrum">'

	# Plot the spectrogram
	fig3 = plt.figure(figsize=(8, 4))
	librosa.display.specshow(
	S_db, sr=sr, x_axis='time', y_axis='linear', hop_length=hop_length)
	plt.colorbar(format='%+2.0f dB')
	plt.title('Spectrogram', fontsize=14)
	plt.xlabel('Time (s)', fontsize=12)
	plt.ylabel('Frequency (Hz)', fontsize=12)
	plt.tight_layout()
	spectrogram_image = io.BytesIO()
	plt.savefig(spectrogram_image, format='png', bbox_inches='tight')
	plt.close(fig3)
	spectrogram_image.seek(0)
	spectrogram_base64 = base64.b64encode(
	spectrogram_image.read()).decode('utf-8')
	spectrogram_html = f'<img src="data:image/png;base64,{spectrogram_base64}" alt="Spectrogram">'

	# Analyze high-frequency content
	# Define a threshold relative to the maximum amplitude
	threshold_db = -80 # dB
	max_amplitude_db = 20 * np.log10(np.max(S_mean + 1e-10))
	threshold_amplitude_db = max_amplitude_db + threshold_db
	threshold_amplitude = 10 ** (threshold_amplitude_db / 20)

	# Find the highest frequency with significant content
	significant_indices = np.where(S_mean >= threshold_amplitude)[0]
	if len(significant_indices) > 0:
	highest_freq = freqs[significant_indices[-1]]

	# Estimate the real sample rate
	estimated_sample_rate = highest_freq * 2 # Nyquist theorem

	significant_freq_text = f"{highest_freq:.2f} Hz"
	estimated_sample_rate_text = f"{estimated_sample_rate / 1000:.2f} kHz"
	else:
	significant_freq_text = "No significant frequency content detected."
	estimated_sample_rate_text = "N/A"

	# Estimate effective bit depth
	# Calculate the signal's dynamic range
	signal_rms = np.sqrt(np.mean(y ** 2))
	noise_floor = np.percentile(np.abs(y), 0.1)
	# Avoid division by zero
	dynamic_range_db = 20 * \
	np.log10(signal_rms / (noise_floor + 1e-10))

	estimated_bit_depth = int(np.ceil(dynamic_range_db / 6.02))

	# Prepare the output text as an HTML table
	output_text = f"""
	<h3 style="font-size:22px;">{os.path.basename(audio_file)}</h3>
	<table style="font-size:18px;">
	<tr><td><strong>File Bit Depth:</strong></td><td>{bit_depth_info}</td></tr>
	<tr><td><strong>Sample Rate:</strong></td><td>{sr} Hz</td></tr>
	<tr><td><strong>Duration:</strong></td><td>{duration:.2f} seconds</td></tr>
	<tr><td><strong>Using n_fft =</strong></td><td>{n_fft}</td></tr>
	<tr><td><strong>Significant frequency content up to:</strong></td><td>{significant_freq_text}</td></tr>
	<tr><td><strong>Estimated Real Sample Rate:</strong></td><td>{estimated_sample_rate_text}</td></tr>
	<tr><td><strong>Estimated Dynamic Range:</strong></td><td>{dynamic_range_db:.2f} dB</td></tr>
	<tr><td><strong>Estimated Effective Bit Depth:</strong></td><td>{estimated_bit_depth} bits PCM</td></tr>
	</table>
	"""

	# Plot histogram of sample values
	fig2 = plt.figure(figsize=(8, 4))
	plt.hist(y, bins=1000, alpha=0.7, color='blue',
	edgecolor='black', log=True)
	plt.xlabel('Amplitude', fontsize=12)
	plt.ylabel('Count (log scale)', fontsize=12)
	plt.title('Histogram of Sample Amplitudes', fontsize=14)
	plt.grid(True)
	plt.tight_layout()
	histogram_image = io.BytesIO()
	plt.savefig(histogram_image, format='png', bbox_inches='tight')
	plt.close(fig2)
	histogram_image.seek(0)
	histogram_base64 = base64.b64encode(
	histogram_image.read()).decode('utf-8')
	histogram_html = f'<img src="data:image/png;base64,{histogram_base64}" alt="Histogram of Sample Amplitudes">'

	# Combine text and images into HTML
	output_html += f"""
	{output_text}
	<h4 style="font-size:20px;">Waveform</h4>
	{waveform_html}
	<h4 style="font-size:20px;">Spectral Features</h4>
	{spectral_features_html}
	<h4 style="font-size:20px;">Frequency Spectrum</h4>
	{spectrum_html}
	<h4 style="font-size:20px;">Spectrogram</h4>
	{spectrogram_html}
	<h4 style="font-size:20px;">Histogram of Sample Amplitudes</h4>
	{histogram_html}
	<hr>
	"""
	except Exception as e:
	# Handle errors gracefully
	output_html += f"<p><strong>File:</strong> {os.path.basename(audio_file)}</p><p><strong>Error:</strong> {str(e)}</p><hr>"

	# Return the aggregated HTML output
	return output_html


	with gr.Blocks() as demo:
	gr.Markdown("Wave Wizard")
	gr.Markdown(
	"Upload one or more audio files, or specify a folder containing audio files.")
	with gr.Row():
	file_input = gr.Files(label="Upload Audio Files",
	type="filepath", file_count="multiple")
	folder_input = gr.Textbox(label="Folder Path (optional)",
	placeholder="Enter folder path containing audio files")
	analyze_button = gr.Button("Analyze")
	output_display = gr.HTML()

	def analyze_wrapper(files, folder_path):
	outputs = analyze_audio_files(files, folder_path)
	return outputs

	analyze_button.click(analyze_wrapper, inputs=[
	file_input, folder_input], outputs=output_display)

	demo.launch()