Spaces:

surya5289
/

Spectral_Analysis

Sleeping

App Files Files Community

surya5289 commited on Aug 6, 2024

Commit

3aa4ef5

verified ·

1 Parent(s): 7ebf4a4

Create app.py

Browse files

Files changed (1) hide show

app.py +122 -0

app.py ADDED Viewed

	@@ -0,0 +1,122 @@

+import librosa
+import numpy as np
+import matplotlib.pyplot as plt
+import cv2
+import io
+import tempfile
+from PIL import Image
+import gradio as gr
+from gradio_imageslider import ImageSlider
+def generate_mel_spectrogram(audio_path, sr=22050, n_mels=128, fmin=0, fmax=7000):
+    # Load audio file
+    y, sr = librosa.load(audio_path, sr=sr)
+    # Generate Mel Spectrogram
+    S = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=n_mels, fmin=fmin, fmax=fmax)
+    S_dB = librosa.power_to_db(S, ref=np.max)
+    return S_dB, y, sr
+def detect_zero_db(spectrogram):
+    # Create a binary mask where the spectrogram values are close to 0 dB
+    threshold = -10  # +0 dB threshold
+    mask = np.isclose(spectrogram, threshold, atol=17)  # Use a tolerance to include values close to 0 dB
+    return mask
+def plot_spectrogram(spectrogram, file_path):
+    # Plot the Mel Spectrogram and save it to a file
+    plt.figure(figsize=(6, 6))
+    plt.axis('off')
+    librosa.display.specshow(spectrogram, sr=22050, x_axis='time', y_axis='mel', fmin=0, fmax=7000)
+    plt.savefig(file_path, format='png', bbox_inches='tight', pad_inches=0)
+    plt.close()
+def plot_edge_spectrogram(edges, file_path):
+    # Plot the Edge Detected Spectrogram and save it to a file
+    plt.figure(figsize=(6, 6))
+    plt.axis('off')
+    plt.imshow(edges, cmap='gray', aspect='auto', origin='lower')
+    plt.savefig(file_path, format='png', bbox_inches='tight', pad_inches=0)
+    plt.close()
+def plot_frequency(times, frequencies, label, color, file_path):
+    plt.figure(figsize=(12, 6))
+    plt.plot(times, frequencies, label=label, color=color, linewidth=2)
+    plt.title(f'{label} Frequency')
+    plt.xlabel('Time (s)')
+    plt.ylabel('Frequency (Hz)')
+    plt.legend()
+    # Save to file
+    plt.savefig(file_path, format='png', bbox_inches='tight', pad_inches=0)
+    plt.close()
+def process_audio(audio_file):
+    mel_spectrogram, y, sr = generate_mel_spectrogram(audio_file)
+    edges = detect_zero_db(mel_spectrogram)
+    # Create temporary files to save the generated images
+    with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as mel_file, \
+        tempfile.NamedTemporaryFile(suffix=".png", delete=False) as edge_file, \
+        tempfile.NamedTemporaryFile(suffix=".png", delete=False) as f0_file, \
+        tempfile.NamedTemporaryFile(suffix=".png", delete=False) as f1_file, \
+        tempfile.NamedTemporaryFile(suffix=".png", delete=False) as f2_file:
+        mel_spectrogram_img = mel_file.name
+        edge_spectrogram_img = edge_file.name
+        f0_img = f0_file.name
+        f1_img = f1_file.name
+        f2_img = f2_file.name
+        # Save the Mel spectrogram and edge-detected spectrogram to the temporary files
+        plot_spectrogram(mel_spectrogram, mel_spectrogram_img)
+        plot_edge_spectrogram(edges, edge_spectrogram_img)
+        # Extract and save individual frequency plots
+        f0, voiced_flag, voiced_probs = librosa.pyin(y, fmin=librosa.note_to_hz('C2'), fmax=librosa.note_to_hz('C7'))
+        times = librosa.times_like(f0, sr=sr)
+        plot_frequency(times, f0, 'F0', 'cyan', f0_img)
+        # Formant frequency (F1 and F2) detection using LPC
+        lpc_order = 5 # LPC order for formant estimation
+        formants = np.empty((times.shape[0], 2))  # F1 and F2
+        formants[:] = np.nan  # Initialize with NaN for unvoiced frames
+        for i in range(len(times)):
+            if voiced_flag[i] and i * sr < len(y):
+                frame = y[int(i * sr):int(i * sr + sr)]  # 1 frame
+                if len(frame) == 0:
+                    continue
+                # Apply LPC
+                A = librosa.lpc(frame, order = lpc_order)
+                rts = np.roots(A)
+                rts = rts[np.imag(rts) >= 0]
+                angz = np.arctan2(np.imag(rts), np.real(rts))
+                frqs = angz * (sr / (2 * np.pi))
+                frqs = np.sort(frqs)
+                if len(frqs) >= 2:
+                    formants[i, 0] = frqs[0]  # F1
+                    formants[i, 1] = frqs[1]  # F2
+        plot_frequency(times, formants[:, 0], 'F1', 'magenta', f1_img)
+        plot_frequency(times, formants[:, 1], 'F2', 'yellow', f2_img)
+    return [mel_spectrogram_img, edge_spectrogram_img], f0_img, f1_img, f2_img
+with gr.Blocks() as demo:
+    with gr.Group():
+        audio_input = gr.Audio(label="Upload an audio file in WAV format", type="filepath")
+        img_slider = ImageSlider(label="Before and After Edge Detection", type="filepath", slider_color="pink")
+        f0_plot = gr.Image(label="F0 Frequency Plot", type="filepath")
+        f1_plot = gr.Image(label="F1 Frequency Plot", type="filepath")
+        f2_plot = gr.Image(label="F2 Frequency Plot", type="filepath")
+        audio_input.upload(process_audio, inputs=audio_input, outputs=[img_slider, f0_plot, f1_plot, f2_plot])
+if __name__ == "__main__":
+    demo.launch()