Spaces:
Sleeping
Sleeping
| import numpy as np | |
| import librosa | |
| import librosa.display | |
| import matplotlib.pyplot as plt | |
| import soundfile as sf | |
| import gradio as gr | |
| import io | |
| import os | |
| import base64 | |
| def analyze_audio_files(files, folder_path): | |
| output_html = "" | |
| file_paths = [] | |
| # Handle inputs: files can be a list of file paths or a folder path | |
| if files: | |
| file_paths.extend(files) | |
| if folder_path: | |
| if os.path.isdir(folder_path): | |
| folder_files = [os.path.join(folder_path, f) for f in os.listdir(folder_path) | |
| if os.path.isfile(os.path.join(folder_path, f))] | |
| file_paths.extend(folder_files) | |
| else: | |
| return f"<p><strong>Folder not found:</strong> {folder_path}</p>" | |
| for audio_file in file_paths: | |
| try: | |
| # Load the audio file | |
| y, sr = librosa.load(audio_file, sr=None) | |
| # Get original bit depth from file metadata | |
| with sf.SoundFile(audio_file) as f: | |
| bit_depth_info = f.subtype_info | |
| # Time domain analysis | |
| duration = len(y) / sr | |
| # Frequency domain analysis | |
| desired_freq_resolution = 10.0 # in Hz | |
| # Calculate n_fft, limit it to a reasonable range | |
| n_fft = int(sr / desired_freq_resolution) | |
| n_fft = 2 ** int(np.ceil(np.log2(n_fft))) # Next power of two | |
| # Set maximum and minimum n_fft to avoid excessive computation | |
| max_n_fft = 32768 | |
| min_n_fft = 1024 | |
| n_fft = min(max(n_fft, min_n_fft), max_n_fft) | |
| hop_length = n_fft // 4 | |
| # Compute the Short-Time Fourier Transform (STFT) | |
| S = np.abs(librosa.stft(y, n_fft=n_fft, hop_length=hop_length)) | |
| # Compute the spectrogram (in dB) | |
| S_db = librosa.amplitude_to_db(S, ref=np.max) | |
| # Average over time to get the frequency spectrum | |
| S_mean = np.mean(S, axis=1) | |
| freqs = np.linspace(0, sr / 2, len(S_mean)) | |
| # Plot the waveform | |
| fig_waveform = plt.figure(figsize=(8, 4)) | |
| librosa.display.waveshow(y, sr=sr, alpha=0.5) | |
| plt.title('Waveform', fontsize=14) | |
| plt.xlabel('Time (s)', fontsize=12) | |
| plt.ylabel('Amplitude', fontsize=12) | |
| plt.tight_layout() | |
| waveform_image = io.BytesIO() | |
| plt.savefig(waveform_image, format='png', bbox_inches='tight') | |
| plt.close(fig_waveform) | |
| waveform_image.seek(0) | |
| waveform_base64 = base64.b64encode( | |
| waveform_image.read()).decode('utf-8') | |
| waveform_html = f'<img src="data:image/png;base64,{waveform_base64}" alt="Waveform">' | |
| # Calculate spectral features: spectral centroid, spectral bandwidth, and spectral rolloff | |
| spectral_centroids = librosa.feature.spectral_centroid(y=y, sr=sr)[ | |
| 0] | |
| spectral_bandwidth = librosa.feature.spectral_bandwidth(y=y, sr=sr)[ | |
| 0] | |
| spectral_rolloff = librosa.feature.spectral_rolloff( | |
| y=y, sr=sr, roll_percent=0.85)[0] | |
| times = librosa.times_like(spectral_centroids) | |
| # Plot the spectral features | |
| fig_spectral_features = plt.figure(figsize=(8, 4)) | |
| plt.semilogy(times, spectral_centroids, label='Spectral Centroid') | |
| plt.semilogy(times, spectral_bandwidth, label='Spectral Bandwidth') | |
| plt.semilogy(times, spectral_rolloff, | |
| label='Spectral Rolloff', linestyle='--') | |
| plt.title('Spectral Features', fontsize=14) | |
| plt.xlabel('Time (s)', fontsize=12) | |
| plt.ylabel('Hz', fontsize=12) | |
| plt.legend(loc='upper right') | |
| plt.tight_layout() | |
| spectral_features_image = io.BytesIO() | |
| plt.savefig(spectral_features_image, | |
| format='png', bbox_inches='tight') | |
| plt.close(fig_spectral_features) | |
| spectral_features_image.seek(0) | |
| spectral_features_base64 = base64.b64encode( | |
| spectral_features_image.read()).decode('utf-8') | |
| spectral_features_html = f'<img src="data:image/png;base64,{spectral_features_base64}" alt="Spectral Features">' | |
| # Plot the frequency spectrum | |
| fig1 = plt.figure(figsize=(8, 4)) | |
| plt.semilogx(freqs, 20 * np.log10(S_mean + 1e-10)) # Avoid log(0) | |
| plt.xlabel('Frequency (Hz)', fontsize=12) | |
| plt.ylabel('Amplitude (dB)', fontsize=12) | |
| plt.title('Frequency Spectrum', fontsize=14) | |
| plt.grid(True, which='both', ls='--') | |
| plt.xlim(20, sr / 2) | |
| plt.tight_layout() | |
| spectrum_image = io.BytesIO() | |
| plt.savefig(spectrum_image, format='png', bbox_inches='tight') | |
| plt.close(fig1) | |
| spectrum_image.seek(0) | |
| spectrum_base64 = base64.b64encode( | |
| spectrum_image.read()).decode('utf-8') | |
| spectrum_html = f'<img src="data:image/png;base64,{spectrum_base64}" alt="Frequency Spectrum">' | |
| # Plot the spectrogram | |
| fig3 = plt.figure(figsize=(8, 4)) | |
| librosa.display.specshow( | |
| S_db, sr=sr, x_axis='time', y_axis='linear', hop_length=hop_length) | |
| plt.colorbar(format='%+2.0f dB') | |
| plt.title('Spectrogram', fontsize=14) | |
| plt.xlabel('Time (s)', fontsize=12) | |
| plt.ylabel('Frequency (Hz)', fontsize=12) | |
| plt.tight_layout() | |
| spectrogram_image = io.BytesIO() | |
| plt.savefig(spectrogram_image, format='png', bbox_inches='tight') | |
| plt.close(fig3) | |
| spectrogram_image.seek(0) | |
| spectrogram_base64 = base64.b64encode( | |
| spectrogram_image.read()).decode('utf-8') | |
| spectrogram_html = f'<img src="data:image/png;base64,{spectrogram_base64}" alt="Spectrogram">' | |
| # Analyze high-frequency content | |
| # Define a threshold relative to the maximum amplitude | |
| threshold_db = -80 # dB | |
| max_amplitude_db = 20 * np.log10(np.max(S_mean + 1e-10)) | |
| threshold_amplitude_db = max_amplitude_db + threshold_db | |
| threshold_amplitude = 10 ** (threshold_amplitude_db / 20) | |
| # Find the highest frequency with significant content | |
| significant_indices = np.where(S_mean >= threshold_amplitude)[0] | |
| if len(significant_indices) > 0: | |
| highest_freq = freqs[significant_indices[-1]] | |
| # Estimate the real sample rate | |
| estimated_sample_rate = highest_freq * 2 # Nyquist theorem | |
| significant_freq_text = f"{highest_freq:.2f} Hz" | |
| estimated_sample_rate_text = f"{estimated_sample_rate / 1000:.2f} kHz" | |
| else: | |
| significant_freq_text = "No significant frequency content detected." | |
| estimated_sample_rate_text = "N/A" | |
| # Estimate effective bit depth | |
| # Calculate the signal's dynamic range | |
| signal_rms = np.sqrt(np.mean(y ** 2)) | |
| noise_floor = np.percentile(np.abs(y), 0.1) | |
| # Avoid division by zero | |
| dynamic_range_db = 20 * \ | |
| np.log10(signal_rms / (noise_floor + 1e-10)) | |
| estimated_bit_depth = int(np.ceil(dynamic_range_db / 6.02)) | |
| # Prepare the output text as an HTML table | |
| output_text = f""" | |
| <h3 style="font-size:22px;">{os.path.basename(audio_file)}</h3> | |
| <table style="font-size:18px;"> | |
| <tr><td><strong>File Bit Depth:</strong></td><td>{bit_depth_info}</td></tr> | |
| <tr><td><strong>Sample Rate:</strong></td><td>{sr} Hz</td></tr> | |
| <tr><td><strong>Duration:</strong></td><td>{duration:.2f} seconds</td></tr> | |
| <tr><td><strong>Using n_fft =</strong></td><td>{n_fft}</td></tr> | |
| <tr><td><strong>Significant frequency content up to:</strong></td><td>{significant_freq_text}</td></tr> | |
| <tr><td><strong>Estimated Real Sample Rate:</strong></td><td>{estimated_sample_rate_text}</td></tr> | |
| <tr><td><strong>Estimated Dynamic Range:</strong></td><td>{dynamic_range_db:.2f} dB</td></tr> | |
| <tr><td><strong>Estimated Effective Bit Depth:</strong></td><td>{estimated_bit_depth} bits PCM</td></tr> | |
| </table> | |
| """ | |
| # Plot histogram of sample values | |
| fig2 = plt.figure(figsize=(8, 4)) | |
| plt.hist(y, bins=1000, alpha=0.7, color='blue', | |
| edgecolor='black', log=True) | |
| plt.xlabel('Amplitude', fontsize=12) | |
| plt.ylabel('Count (log scale)', fontsize=12) | |
| plt.title('Histogram of Sample Amplitudes', fontsize=14) | |
| plt.grid(True) | |
| plt.tight_layout() | |
| histogram_image = io.BytesIO() | |
| plt.savefig(histogram_image, format='png', bbox_inches='tight') | |
| plt.close(fig2) | |
| histogram_image.seek(0) | |
| histogram_base64 = base64.b64encode( | |
| histogram_image.read()).decode('utf-8') | |
| histogram_html = f'<img src="data:image/png;base64,{histogram_base64}" alt="Histogram of Sample Amplitudes">' | |
| # Combine text and images into HTML | |
| output_html += f""" | |
| {output_text} | |
| <h4 style="font-size:20px;">Waveform</h4> | |
| {waveform_html} | |
| <h4 style="font-size:20px;">Spectral Features</h4> | |
| {spectral_features_html} | |
| <h4 style="font-size:20px;">Frequency Spectrum</h4> | |
| {spectrum_html} | |
| <h4 style="font-size:20px;">Spectrogram</h4> | |
| {spectrogram_html} | |
| <h4 style="font-size:20px;">Histogram of Sample Amplitudes</h4> | |
| {histogram_html} | |
| <hr> | |
| """ | |
| except Exception as e: | |
| # Handle errors gracefully | |
| output_html += f"<p><strong>File:</strong> {os.path.basename(audio_file)}</p><p><strong>Error:</strong> {str(e)}</p><hr>" | |
| # Return the aggregated HTML output | |
| return output_html | |
| with gr.Blocks() as demo: | |
| gr.Markdown("Wave Wizard") | |
| gr.Markdown( | |
| "Upload one or more audio files, or specify a folder containing audio files.") | |
| with gr.Row(): | |
| file_input = gr.Files(label="Upload Audio Files", | |
| type="filepath", file_count="multiple") | |
| folder_input = gr.Textbox(label="Folder Path (optional)", | |
| placeholder="Enter folder path containing audio files") | |
| analyze_button = gr.Button("Analyze") | |
| output_display = gr.HTML() | |
| def analyze_wrapper(files, folder_path): | |
| outputs = analyze_audio_files(files, folder_path) | |
| return outputs | |
| analyze_button.click(analyze_wrapper, inputs=[ | |
| file_input, folder_input], outputs=output_display) | |
| demo.launch() | |