Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import librosa | |
| import numpy as np | |
| import matplotlib.pyplot as plt | |
| import tempfile | |
| import librosa.display | |
| def calculate_basic_metrics(y, sr): | |
| pitches, magnitudes = librosa.piptrack(y=y, sr=sr) | |
| average_pitch = np.mean(pitches[pitches > 0]) | |
| mfccs = librosa.feature.mfcc(y=y, sr=sr) | |
| energy = np.sum(y ** 2) | |
| zero_crossing_rate = np.mean(librosa.feature.zero_crossing_rate(y)) | |
| spectral_centroid = np.mean(librosa.feature.spectral_centroid(y=y, sr=sr)) | |
| return { | |
| 'Average Pitch': average_pitch, | |
| 'Number of MFCCs': mfccs.shape[1], | |
| 'Energy': energy, | |
| 'Zero Crossing Rate': zero_crossing_rate, | |
| 'Spectral Centroid': spectral_centroid | |
| } | |
| def calculate_advanced_metrics(y, sr): | |
| metrics = {} | |
| f0, _, _ = librosa.pyin(y, fmin=50, fmax=4000) | |
| if f0 is not None: | |
| metrics['Average F0 (YIN)'] = np.nanmean(f0) | |
| chroma = librosa.feature.chroma_stft(y=y, sr=sr) | |
| metrics['Average Chroma'] = np.mean(chroma) | |
| spectral_contrast = librosa.feature.spectral_contrast(y=y, sr=sr) | |
| metrics['Average Spectral Contrast'] = np.mean(spectral_contrast) | |
| return metrics | |
| def generate_spectrogram(y, sr): | |
| plt.figure(figsize=(10, 4)) | |
| librosa.display.specshow(librosa.amplitude_to_db(np.abs(librosa.stft(y)), ref=np.max), sr=sr, x_axis='time', y_axis='log') | |
| plt.colorbar(format='%+2.0f dB') | |
| plt.title('Spectrogram') | |
| plt.tight_layout() | |
| with tempfile.NamedTemporaryFile(delete=False, suffix='.png', mode='w+b') as f: | |
| plt.savefig(f.name, format='png') | |
| plt.close() | |
| return f.name | |
| def process_audio(file): | |
| if file is None: | |
| return {}, "placeholder.png" | |
| sr, y = file | |
| if y.dtype != np.float32: | |
| y = y.astype(np.float32) / np.iinfo(y.dtype).max | |
| basic_metrics = calculate_basic_metrics(y, sr) | |
| advanced_metrics = calculate_advanced_metrics(y, sr) | |
| metrics = {**basic_metrics, **advanced_metrics} | |
| image_path = generate_spectrogram(y, sr) | |
| return metrics, image_path | |
| iface = gr.Interface( | |
| fn=process_audio, | |
| inputs=gr.Audio(label="Upload Audio"), | |
| outputs=["json", "image"], | |
| title="Speech-Scope", | |
| description="Speech and audio Metrics Analysis" | |
| ) | |
| iface.launch(debug=True) | |