surya5289 commited on
Commit
3aa4ef5
·
verified ·
1 Parent(s): 7ebf4a4

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +122 -0
app.py ADDED
@@ -0,0 +1,122 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import librosa
2
+ import numpy as np
3
+ import matplotlib.pyplot as plt
4
+ import cv2
5
+ import io
6
+ import tempfile
7
+ from PIL import Image
8
+ import gradio as gr
9
+ from gradio_imageslider import ImageSlider
10
+
11
+ def generate_mel_spectrogram(audio_path, sr=22050, n_mels=128, fmin=0, fmax=7000):
12
+ # Load audio file
13
+ y, sr = librosa.load(audio_path, sr=sr)
14
+
15
+ # Generate Mel Spectrogram
16
+ S = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=n_mels, fmin=fmin, fmax=fmax)
17
+ S_dB = librosa.power_to_db(S, ref=np.max)
18
+
19
+ return S_dB, y, sr
20
+
21
+ def detect_zero_db(spectrogram):
22
+ # Create a binary mask where the spectrogram values are close to 0 dB
23
+ threshold = -10 # +0 dB threshold
24
+ mask = np.isclose(spectrogram, threshold, atol=17) # Use a tolerance to include values close to 0 dB
25
+
26
+ return mask
27
+
28
+ def plot_spectrogram(spectrogram, file_path):
29
+ # Plot the Mel Spectrogram and save it to a file
30
+ plt.figure(figsize=(6, 6))
31
+ plt.axis('off')
32
+ librosa.display.specshow(spectrogram, sr=22050, x_axis='time', y_axis='mel', fmin=0, fmax=7000)
33
+ plt.savefig(file_path, format='png', bbox_inches='tight', pad_inches=0)
34
+ plt.close()
35
+
36
+ def plot_edge_spectrogram(edges, file_path):
37
+ # Plot the Edge Detected Spectrogram and save it to a file
38
+ plt.figure(figsize=(6, 6))
39
+ plt.axis('off')
40
+ plt.imshow(edges, cmap='gray', aspect='auto', origin='lower')
41
+ plt.savefig(file_path, format='png', bbox_inches='tight', pad_inches=0)
42
+ plt.close()
43
+
44
+ def plot_frequency(times, frequencies, label, color, file_path):
45
+ plt.figure(figsize=(12, 6))
46
+ plt.plot(times, frequencies, label=label, color=color, linewidth=2)
47
+ plt.title(f'{label} Frequency')
48
+ plt.xlabel('Time (s)')
49
+ plt.ylabel('Frequency (Hz)')
50
+ plt.legend()
51
+
52
+ # Save to file
53
+ plt.savefig(file_path, format='png', bbox_inches='tight', pad_inches=0)
54
+ plt.close()
55
+
56
+ def process_audio(audio_file):
57
+ mel_spectrogram, y, sr = generate_mel_spectrogram(audio_file)
58
+ edges = detect_zero_db(mel_spectrogram)
59
+
60
+ # Create temporary files to save the generated images
61
+ with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as mel_file, \
62
+ tempfile.NamedTemporaryFile(suffix=".png", delete=False) as edge_file, \
63
+ tempfile.NamedTemporaryFile(suffix=".png", delete=False) as f0_file, \
64
+ tempfile.NamedTemporaryFile(suffix=".png", delete=False) as f1_file, \
65
+ tempfile.NamedTemporaryFile(suffix=".png", delete=False) as f2_file:
66
+
67
+ mel_spectrogram_img = mel_file.name
68
+ edge_spectrogram_img = edge_file.name
69
+ f0_img = f0_file.name
70
+ f1_img = f1_file.name
71
+ f2_img = f2_file.name
72
+
73
+ # Save the Mel spectrogram and edge-detected spectrogram to the temporary files
74
+ plot_spectrogram(mel_spectrogram, mel_spectrogram_img)
75
+ plot_edge_spectrogram(edges, edge_spectrogram_img)
76
+
77
+ # Extract and save individual frequency plots
78
+ f0, voiced_flag, voiced_probs = librosa.pyin(y, fmin=librosa.note_to_hz('C2'), fmax=librosa.note_to_hz('C7'))
79
+ times = librosa.times_like(f0, sr=sr)
80
+
81
+ plot_frequency(times, f0, 'F0', 'cyan', f0_img)
82
+
83
+ # Formant frequency (F1 and F2) detection using LPC
84
+ lpc_order = 5 # LPC order for formant estimation
85
+ formants = np.empty((times.shape[0], 2)) # F1 and F2
86
+ formants[:] = np.nan # Initialize with NaN for unvoiced frames
87
+
88
+ for i in range(len(times)):
89
+ if voiced_flag[i] and i * sr < len(y):
90
+ frame = y[int(i * sr):int(i * sr + sr)] # 1 frame
91
+ if len(frame) == 0:
92
+ continue
93
+
94
+ # Apply LPC
95
+ A = librosa.lpc(frame, order = lpc_order)
96
+ rts = np.roots(A)
97
+ rts = rts[np.imag(rts) >= 0]
98
+ angz = np.arctan2(np.imag(rts), np.real(rts))
99
+ frqs = angz * (sr / (2 * np.pi))
100
+ frqs = np.sort(frqs)
101
+
102
+ if len(frqs) >= 2:
103
+ formants[i, 0] = frqs[0] # F1
104
+ formants[i, 1] = frqs[1] # F2
105
+
106
+ plot_frequency(times, formants[:, 0], 'F1', 'magenta', f1_img)
107
+ plot_frequency(times, formants[:, 1], 'F2', 'yellow', f2_img)
108
+
109
+ return [mel_spectrogram_img, edge_spectrogram_img], f0_img, f1_img, f2_img
110
+
111
+ with gr.Blocks() as demo:
112
+ with gr.Group():
113
+ audio_input = gr.Audio(label="Upload an audio file in WAV format", type="filepath")
114
+ img_slider = ImageSlider(label="Before and After Edge Detection", type="filepath", slider_color="pink")
115
+ f0_plot = gr.Image(label="F0 Frequency Plot", type="filepath")
116
+ f1_plot = gr.Image(label="F1 Frequency Plot", type="filepath")
117
+ f2_plot = gr.Image(label="F2 Frequency Plot", type="filepath")
118
+
119
+ audio_input.upload(process_audio, inputs=audio_input, outputs=[img_slider, f0_plot, f1_plot, f2_plot])
120
+
121
+ if __name__ == "__main__":
122
+ demo.launch()