Spaces:

wwebec
/

empty-string

Build error

App Files Files Community

janwinkler1 commited on May 19, 2024

Commit

6ea8953

1 Parent(s): 744a358

first shot eda, with random data

Browse files

Files changed (1) hide show

python/eda_jan.py +320 -0

python/eda_jan.py ADDED Viewed

	@@ -0,0 +1,320 @@

+# ---
+# jupyter:
+#   jupytext:
+#     text_representation:
+#       extension: .py
+#       format_name: percent
+#       format_version: '1.3'
+#       jupytext_version: 1.16.1
+#   kernelspec:
+#     display_name: Python 3 (ipykernel)
+#     language: python
+#     name: python3
+# ---
+# %%
+import os
+import numpy as np
+import librosa
+import librosa.display
+import matplotlib.pyplot as plt
+from sklearn.cluster import KMeans
+from sklearn.decomposition import PCA
+from IPython.display import Audio, display
+# %%
+# Load the entire audio file
+cwd = os.getcwd()
+relative_path = "data/soundscape_data/PER_001_S01_20190116_100007Z.flac"
+file_path = os.path.join(cwd, relative_path)
+y, sr = librosa.load(file_path, sr=44100)
+# %%
+# split soundfile in to 10s chunks
+window_size = 10  # window size in seconds
+hop_size = 10     # hop size in seconds
+# Convert window and hop size to samples
+window_samples = int(window_size * sr)
+hop_samples = int(hop_size * sr)
+# Total number of windows
+num_windows = (len(y) - window_samples) // hop_samples + 1
+print(f"Total number of windows: {num_windows}")
+# %%
+# Define frequency bands (in Hz)
+bands = {
+    'Sub-bass': (20, 60),
+    'Bass': (60, 250),
+    'Low Midrange': (250, 500),
+    'Midrange': (500, 2000),
+    'Upper Midrange': (2000, 4000),
+    'Presence': (4000, 6000),
+    'Brilliance': (6000, 20000)
+}
+# Initialize a list to hold the features
+all_features = []
+for i in range(num_windows):
+    start_sample = i * hop_samples
+    end_sample = start_sample + window_samples
+    y_window = y[start_sample:end_sample]
+    # Compute STFT
+    S = librosa.stft(y_window)
+    S_db = librosa.amplitude_to_db(np.abs(S))
+    # Compute features for each band
+    features = []
+    for band, (low_freq, high_freq) in bands.items():
+        low_bin = int(np.floor(low_freq * (S.shape[0] / sr)))
+        high_bin = int(np.ceil(high_freq * (S.shape[0] / sr)))
+        band_energy = np.mean(S_db[low_bin:high_bin, :], axis=0)
+        features.append(band_energy)
+    # Flatten the feature array and add to all_features
+    features_flat = np.concatenate(features)
+    all_features.append(features_flat)
+# Convert to numpy array
+all_features = np.array(all_features)
+# %%
+# Reduce dimensionality with PCA
+pca = PCA(n_components=2)
+features_reduced = pca.fit_transform(all_features)
+# Perform k-means clustering
+kmeans = KMeans(n_clusters=5)  # Example: 5 clusters
+clusters = kmeans.fit_predict(features_reduced)
+# Plot the clusters
+plt.figure(figsize=(10, 6))
+scatter = plt.scatter(features_reduced[:, 0], features_reduced[:, 1], c=clusters, cmap='viridis')
+plt.title('Clustered Frequency Band Features')
+plt.xlabel('Principal Component 1')
+plt.ylabel('Principal Component 2')
+plt.colorbar(scatter, label='Cluster')
+plt.show()
+# %%
+# Play the audio for a representative sample from each cluster
+for cluster_label in np.unique(clusters):
+    # Find the first data point in the cluster
+    representative_index = np.where(clusters == cluster_label)[0][0]
+    # Use the original audio window at the representative index
+    start_sample = representative_index * hop_samples
+    end_sample = start_sample + window_samples
+    y_representative = y[start_sample:end_sample]
+    print(f"Cluster {cluster_label} representative audio:")
+    display(Audio(data=y_representative, rate=sr))
+# %% [markdown]
+# ## pipeline for all the files
+# %%
+import os
+import numpy as np
+import librosa
+from sklearn.preprocessing import StandardScaler
+import joblib
+# Directory containing the audio files
+audio_dir = "data/soundscape_data"
+# Parameters for windowing
+window_size = 10  # window size in seconds
+hop_size = 10     # hop size in seconds
+# Define frequency bands (in Hz)
+bands = {
+    'Sub-bass': (20, 60),
+    'Bass': (60, 250),
+    'Low Midrange': (250, 500),
+    'Midrange': (500, 2000),
+    'Upper Midrange': (2000, 4000),
+    'Presence': (4000, 6000),
+    'Brilliance': (6000, 20000)
+}
+# Directory to save features
+features_dir = "features"
+os.makedirs(features_dir, exist_ok=True)
+# Iterate over each audio file in the directory
+for filename in os.listdir(audio_dir):
+    if filename.endswith(".flac"):
+        file_path = os.path.join(audio_dir, filename)
+        y, sr = librosa.load(file_path, sr=44100)
+        # Convert window and hop size to samples
+        window_samples = int(window_size * sr)
+        hop_samples = int(hop_size * sr)
+        # Total number of windows in the current file
+        num_windows = (len(y) - window_samples) // hop_samples + 1
+        all_features = []
+        for i in range(num_windows):
+            start_sample = i * hop_samples
+            end_sample = start_sample + window_samples
+            y_window = y[start_sample:end_sample]
+            # Compute STFT
+            S = librosa.stft(y_window)
+            S_db = librosa.amplitude_to_db(np.abs(S))
+            # Compute features for each band
+            features = []
+            for band, (low_freq, high_freq) in bands.items():
+                low_bin = int(np.floor(low_freq * (S.shape[0] / sr)))
+                high_bin = int(np.ceil(high_freq * (S.shape[0] / sr)))
+                band_energy = np.mean(S_db[low_bin:high_bin, :], axis=0)
+                features.append(band_energy)
+            # Flatten the feature array and add to all_features
+            features_flat = np.concatenate(features)
+            all_features.append(features_flat)
+        # Convert to numpy array
+        all_features = np.array(all_features)
+        # Standardize features
+        scaler = StandardScaler()
+        all_features = scaler.fit_transform(all_features)
+        # Save features to disk
+        feature_file = os.path.join(features_dir, f"{os.path.splitext(filename)[0]}_features.npy")
+        joblib.dump((all_features, scaler), feature_file)
+# %%
+import numpy as np
+import joblib
+from sklearn.cluster import KMeans
+from sklearn.decomposition import PCA
+import matplotlib.pyplot as plt
+# Directory to load features
+features_dir = "features"
+# Load all features
+all_features = []
+for feature_file in os.listdir(features_dir):
+    if feature_file.endswith("_features.npy"):
+        features, _ = joblib.load(os.path.join(features_dir, feature_file))
+        all_features.append(features)
+# Combine all features into a single array
+all_features = np.vstack(all_features)
+# Perform PCA for 2D visualization
+pca = PCA(n_components=2)
+features_pca = pca.fit_transform(all_features)
+# Perform k-means clustering
+kmeans = KMeans(n_clusters=5)  # Example: 5 clusters
+clusters = kmeans.fit_predict(all_features)
+# Plot the PCA-reduced features with cluster labels
+plt.figure(figsize=(10, 6))
+scatter = plt.scatter(features_pca[:, 0], features_pca[:, 1], c=clusters, cmap='viridis')
+plt.title('PCA of Clustered Frequency Band Features')
+plt.xlabel('Principal Component 1')
+plt.ylabel('Principal Component 2')
+plt.colorbar(scatter, label='Cluster')
+plt.show()
+# Save clustering results
+clustering_results = {
+    'clusters': clusters,
+    'kmeans': kmeans,
+    'pca': pca
+}
+joblib.dump(clustering_results, 'clustering_results.pkl')
+# Plot the clusters
+plt.figure(figsize=(10, 6))
+for i in range(5):
+    plt.plot(all_features[clusters == i].mean(axis=0), label=f'Cluster {i}')
+plt.legend()
+plt.title('Clustered Frequency Band Features')
+plt.show()
+# %%
+import os
+import numpy as np
+import librosa
+from IPython.display import Audio, display
+import joblib
+# Directory containing the audio files
+audio_dir = "data/soundscape_data"
+# Directory to load features
+features_dir = "features"
+# Parameters for windowing
+window_size = 10  # window size in seconds
+hop_size = 10     # hop size in seconds
+# Load clustering results
+clustering_results = joblib.load('clustering_results.pkl')
+clusters = clustering_results['clusters']
+# Load all features
+all_features = []
+audio_segments = []
+for feature_file in os.listdir(features_dir):
+    if feature_file.endswith("_features.npy"):
+        features, scaler = joblib.load(os.path.join(features_dir, feature_file))
+        filename = feature_file.replace('_features.npy', '.flac')
+        file_path = os.path.join(audio_dir, filename)
+        y, sr = librosa.load(file_path, sr=44100)
+        # Convert window and hop size to samples
+        window_samples = int(window_size * sr)
+        hop_samples = int(hop_size * sr)
+        num_windows = (len(y) - window_samples) // hop_samples + 1
+        for i in range(num_windows):
+            start_sample = i * hop_samples
+            end_sample = start_sample + window_samples
+            y_window = y[start_sample:end_sample]
+            audio_segments.append(y_window)
+        all_features.append(features)
+# Flatten the list of all features
+all_features = np.vstack(all_features)
+# Play the audio for a representative sample from each cluster
+for cluster_label in np.unique(clusters):
+    try:
+        # Find the first data point in the cluster
+        representative_index = np.where(clusters == cluster_label)[0][0]
+        # Use the original audio segment at the representative index
+        y_representative = audio_segments[representative_index]
+        # Check if y_representative is not empty
+        if y_representative.size == 0:
+            raise ValueError("The audio segment is empty")
+        print(f"Cluster {cluster_label} representative audio:")
+        display(Audio(data=y_representative, rate=sr))
+    except Exception as e:
+        print(f"Could not play audio for cluster {cluster_label}: {e}")
+# %%