janwinkler1 commited on
Commit
6ea8953
·
1 Parent(s): 744a358

first shot eda, with random data

Browse files
Files changed (1) hide show
  1. python/eda_jan.py +320 -0
python/eda_jan.py ADDED
@@ -0,0 +1,320 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # ---
2
+ # jupyter:
3
+ # jupytext:
4
+ # text_representation:
5
+ # extension: .py
6
+ # format_name: percent
7
+ # format_version: '1.3'
8
+ # jupytext_version: 1.16.1
9
+ # kernelspec:
10
+ # display_name: Python 3 (ipykernel)
11
+ # language: python
12
+ # name: python3
13
+ # ---
14
+
15
+ # %%
16
+ import os
17
+ import numpy as np
18
+ import librosa
19
+ import librosa.display
20
+ import matplotlib.pyplot as plt
21
+ from sklearn.cluster import KMeans
22
+ from sklearn.decomposition import PCA
23
+ from IPython.display import Audio, display
24
+
25
+ # %%
26
+ # Load the entire audio file
27
+ cwd = os.getcwd()
28
+ relative_path = "data/soundscape_data/PER_001_S01_20190116_100007Z.flac"
29
+ file_path = os.path.join(cwd, relative_path)
30
+ y, sr = librosa.load(file_path, sr=44100)
31
+
32
+ # %%
33
+ # split soundfile in to 10s chunks
34
+ window_size = 10 # window size in seconds
35
+ hop_size = 10 # hop size in seconds
36
+
37
+ # Convert window and hop size to samples
38
+ window_samples = int(window_size * sr)
39
+ hop_samples = int(hop_size * sr)
40
+
41
+ # Total number of windows
42
+ num_windows = (len(y) - window_samples) // hop_samples + 1
43
+
44
+ print(f"Total number of windows: {num_windows}")
45
+
46
+
47
+ # %%
48
+ # Define frequency bands (in Hz)
49
+ bands = {
50
+ 'Sub-bass': (20, 60),
51
+ 'Bass': (60, 250),
52
+ 'Low Midrange': (250, 500),
53
+ 'Midrange': (500, 2000),
54
+ 'Upper Midrange': (2000, 4000),
55
+ 'Presence': (4000, 6000),
56
+ 'Brilliance': (6000, 20000)
57
+ }
58
+
59
+ # Initialize a list to hold the features
60
+ all_features = []
61
+
62
+ for i in range(num_windows):
63
+ start_sample = i * hop_samples
64
+ end_sample = start_sample + window_samples
65
+ y_window = y[start_sample:end_sample]
66
+
67
+ # Compute STFT
68
+ S = librosa.stft(y_window)
69
+ S_db = librosa.amplitude_to_db(np.abs(S))
70
+
71
+ # Compute features for each band
72
+ features = []
73
+ for band, (low_freq, high_freq) in bands.items():
74
+ low_bin = int(np.floor(low_freq * (S.shape[0] / sr)))
75
+ high_bin = int(np.ceil(high_freq * (S.shape[0] / sr)))
76
+ band_energy = np.mean(S_db[low_bin:high_bin, :], axis=0)
77
+ features.append(band_energy)
78
+
79
+ # Flatten the feature array and add to all_features
80
+ features_flat = np.concatenate(features)
81
+ all_features.append(features_flat)
82
+
83
+ # Convert to numpy array
84
+ all_features = np.array(all_features)
85
+
86
+
87
+ # %%
88
+ # Reduce dimensionality with PCA
89
+ pca = PCA(n_components=2)
90
+ features_reduced = pca.fit_transform(all_features)
91
+
92
+ # Perform k-means clustering
93
+ kmeans = KMeans(n_clusters=5) # Example: 5 clusters
94
+ clusters = kmeans.fit_predict(features_reduced)
95
+
96
+ # Plot the clusters
97
+ plt.figure(figsize=(10, 6))
98
+ scatter = plt.scatter(features_reduced[:, 0], features_reduced[:, 1], c=clusters, cmap='viridis')
99
+ plt.title('Clustered Frequency Band Features')
100
+ plt.xlabel('Principal Component 1')
101
+ plt.ylabel('Principal Component 2')
102
+ plt.colorbar(scatter, label='Cluster')
103
+ plt.show()
104
+
105
+
106
+ # %%
107
+ # Play the audio for a representative sample from each cluster
108
+ for cluster_label in np.unique(clusters):
109
+ # Find the first data point in the cluster
110
+ representative_index = np.where(clusters == cluster_label)[0][0]
111
+
112
+ # Use the original audio window at the representative index
113
+ start_sample = representative_index * hop_samples
114
+ end_sample = start_sample + window_samples
115
+ y_representative = y[start_sample:end_sample]
116
+
117
+ print(f"Cluster {cluster_label} representative audio:")
118
+ display(Audio(data=y_representative, rate=sr))
119
+
120
+
121
+ # %% [markdown]
122
+ # ## pipeline for all the files
123
+
124
+ # %%
125
+ import os
126
+ import numpy as np
127
+ import librosa
128
+ from sklearn.preprocessing import StandardScaler
129
+ import joblib
130
+
131
+ # Directory containing the audio files
132
+ audio_dir = "data/soundscape_data"
133
+
134
+ # Parameters for windowing
135
+ window_size = 10 # window size in seconds
136
+ hop_size = 10 # hop size in seconds
137
+
138
+ # Define frequency bands (in Hz)
139
+ bands = {
140
+ 'Sub-bass': (20, 60),
141
+ 'Bass': (60, 250),
142
+ 'Low Midrange': (250, 500),
143
+ 'Midrange': (500, 2000),
144
+ 'Upper Midrange': (2000, 4000),
145
+ 'Presence': (4000, 6000),
146
+ 'Brilliance': (6000, 20000)
147
+ }
148
+
149
+ # Directory to save features
150
+ features_dir = "features"
151
+ os.makedirs(features_dir, exist_ok=True)
152
+
153
+ # Iterate over each audio file in the directory
154
+ for filename in os.listdir(audio_dir):
155
+ if filename.endswith(".flac"):
156
+ file_path = os.path.join(audio_dir, filename)
157
+ y, sr = librosa.load(file_path, sr=44100)
158
+
159
+ # Convert window and hop size to samples
160
+ window_samples = int(window_size * sr)
161
+ hop_samples = int(hop_size * sr)
162
+
163
+ # Total number of windows in the current file
164
+ num_windows = (len(y) - window_samples) // hop_samples + 1
165
+
166
+ all_features = []
167
+
168
+ for i in range(num_windows):
169
+ start_sample = i * hop_samples
170
+ end_sample = start_sample + window_samples
171
+ y_window = y[start_sample:end_sample]
172
+
173
+ # Compute STFT
174
+ S = librosa.stft(y_window)
175
+ S_db = librosa.amplitude_to_db(np.abs(S))
176
+
177
+ # Compute features for each band
178
+ features = []
179
+ for band, (low_freq, high_freq) in bands.items():
180
+ low_bin = int(np.floor(low_freq * (S.shape[0] / sr)))
181
+ high_bin = int(np.ceil(high_freq * (S.shape[0] / sr)))
182
+ band_energy = np.mean(S_db[low_bin:high_bin, :], axis=0)
183
+ features.append(band_energy)
184
+
185
+ # Flatten the feature array and add to all_features
186
+ features_flat = np.concatenate(features)
187
+ all_features.append(features_flat)
188
+
189
+ # Convert to numpy array
190
+ all_features = np.array(all_features)
191
+
192
+ # Standardize features
193
+ scaler = StandardScaler()
194
+ all_features = scaler.fit_transform(all_features)
195
+
196
+ # Save features to disk
197
+ feature_file = os.path.join(features_dir, f"{os.path.splitext(filename)[0]}_features.npy")
198
+ joblib.dump((all_features, scaler), feature_file)
199
+
200
+
201
+ # %%
202
+ import numpy as np
203
+ import joblib
204
+ from sklearn.cluster import KMeans
205
+ from sklearn.decomposition import PCA
206
+ import matplotlib.pyplot as plt
207
+
208
+ # Directory to load features
209
+ features_dir = "features"
210
+
211
+ # Load all features
212
+ all_features = []
213
+ for feature_file in os.listdir(features_dir):
214
+ if feature_file.endswith("_features.npy"):
215
+ features, _ = joblib.load(os.path.join(features_dir, feature_file))
216
+ all_features.append(features)
217
+
218
+ # Combine all features into a single array
219
+ all_features = np.vstack(all_features)
220
+
221
+ # Perform PCA for 2D visualization
222
+ pca = PCA(n_components=2)
223
+ features_pca = pca.fit_transform(all_features)
224
+
225
+ # Perform k-means clustering
226
+ kmeans = KMeans(n_clusters=5) # Example: 5 clusters
227
+ clusters = kmeans.fit_predict(all_features)
228
+
229
+ # Plot the PCA-reduced features with cluster labels
230
+ plt.figure(figsize=(10, 6))
231
+ scatter = plt.scatter(features_pca[:, 0], features_pca[:, 1], c=clusters, cmap='viridis')
232
+ plt.title('PCA of Clustered Frequency Band Features')
233
+ plt.xlabel('Principal Component 1')
234
+ plt.ylabel('Principal Component 2')
235
+ plt.colorbar(scatter, label='Cluster')
236
+ plt.show()
237
+
238
+ # Save clustering results
239
+ clustering_results = {
240
+ 'clusters': clusters,
241
+ 'kmeans': kmeans,
242
+ 'pca': pca
243
+ }
244
+ joblib.dump(clustering_results, 'clustering_results.pkl')
245
+
246
+ # Plot the clusters
247
+ plt.figure(figsize=(10, 6))
248
+ for i in range(5):
249
+ plt.plot(all_features[clusters == i].mean(axis=0), label=f'Cluster {i}')
250
+ plt.legend()
251
+ plt.title('Clustered Frequency Band Features')
252
+ plt.show()
253
+
254
+ # %%
255
+ import os
256
+ import numpy as np
257
+ import librosa
258
+ from IPython.display import Audio, display
259
+ import joblib
260
+
261
+ # Directory containing the audio files
262
+ audio_dir = "data/soundscape_data"
263
+ # Directory to load features
264
+ features_dir = "features"
265
+
266
+ # Parameters for windowing
267
+ window_size = 10 # window size in seconds
268
+ hop_size = 10 # hop size in seconds
269
+
270
+ # Load clustering results
271
+ clustering_results = joblib.load('clustering_results.pkl')
272
+ clusters = clustering_results['clusters']
273
+
274
+ # Load all features
275
+ all_features = []
276
+ audio_segments = []
277
+
278
+ for feature_file in os.listdir(features_dir):
279
+ if feature_file.endswith("_features.npy"):
280
+ features, scaler = joblib.load(os.path.join(features_dir, feature_file))
281
+ filename = feature_file.replace('_features.npy', '.flac')
282
+ file_path = os.path.join(audio_dir, filename)
283
+ y, sr = librosa.load(file_path, sr=44100)
284
+
285
+ # Convert window and hop size to samples
286
+ window_samples = int(window_size * sr)
287
+ hop_samples = int(hop_size * sr)
288
+
289
+ num_windows = (len(y) - window_samples) // hop_samples + 1
290
+ for i in range(num_windows):
291
+ start_sample = i * hop_samples
292
+ end_sample = start_sample + window_samples
293
+ y_window = y[start_sample:end_sample]
294
+ audio_segments.append(y_window)
295
+ all_features.append(features)
296
+
297
+ # Flatten the list of all features
298
+ all_features = np.vstack(all_features)
299
+
300
+ # Play the audio for a representative sample from each cluster
301
+ for cluster_label in np.unique(clusters):
302
+ try:
303
+ # Find the first data point in the cluster
304
+ representative_index = np.where(clusters == cluster_label)[0][0]
305
+
306
+ # Use the original audio segment at the representative index
307
+ y_representative = audio_segments[representative_index]
308
+
309
+ # Check if y_representative is not empty
310
+ if y_representative.size == 0:
311
+ raise ValueError("The audio segment is empty")
312
+
313
+ print(f"Cluster {cluster_label} representative audio:")
314
+ display(Audio(data=y_representative, rate=sr))
315
+
316
+ except Exception as e:
317
+ print(f"Could not play audio for cluster {cluster_label}: {e}")
318
+
319
+
320
+ # %%