Spaces:
Build error
Build error
| # --- | |
| # jupyter: | |
| # jupytext: | |
| # text_representation: | |
| # extension: .py | |
| # format_name: percent | |
| # format_version: '1.3' | |
| # jupytext_version: 1.16.1 | |
| # kernelspec: | |
| # display_name: Python 3 (ipykernel) | |
| # language: python | |
| # name: python3 | |
| # --- | |
| # %% | |
| import os | |
| import numpy as np | |
| import librosa | |
| import librosa.display | |
| import matplotlib.pyplot as plt | |
| from sklearn.cluster import KMeans | |
| from sklearn.decomposition import PCA | |
| from IPython.display import Audio, display | |
| # %% | |
| # Load the entire audio file | |
| cwd = os.getcwd() | |
| relative_path = "data/soundscape_data/PER_001_S01_20190116_100007Z.flac" | |
| file_path = os.path.join(cwd, relative_path) | |
| y, sr = librosa.load(file_path, sr=44100) | |
| # %% | |
| # split soundfile in to 10s chunks | |
| window_size = 10 # window size in seconds | |
| hop_size = 10 # hop size in seconds | |
| # Convert window and hop size to samples | |
| window_samples = int(window_size * sr) | |
| hop_samples = int(hop_size * sr) | |
| # Total number of windows | |
| num_windows = (len(y) - window_samples) // hop_samples + 1 | |
| print(f"Total number of windows: {num_windows}") | |
| # %% | |
| # Define frequency bands (in Hz) | |
| bands = { | |
| 'Sub-bass': (20, 60), | |
| 'Bass': (60, 250), | |
| 'Low Midrange': (250, 500), | |
| 'Midrange': (500, 2000), | |
| 'Upper Midrange': (2000, 4000), | |
| 'Presence': (4000, 6000), | |
| 'Brilliance': (6000, 20000) | |
| } | |
| # Initialize a list to hold the features | |
| all_features = [] | |
| for i in range(num_windows): | |
| start_sample = i * hop_samples | |
| end_sample = start_sample + window_samples | |
| y_window = y[start_sample:end_sample] | |
| # Compute STFT | |
| S = librosa.stft(y_window) | |
| S_db = librosa.amplitude_to_db(np.abs(S)) | |
| # Compute features for each band | |
| features = [] | |
| for band, (low_freq, high_freq) in bands.items(): | |
| low_bin = int(np.floor(low_freq * (S.shape[0] / sr))) | |
| high_bin = int(np.ceil(high_freq * (S.shape[0] / sr))) | |
| band_energy = np.mean(S_db[low_bin:high_bin, :], axis=0) | |
| features.append(band_energy) | |
| # Flatten the feature array and add to all_features | |
| features_flat = np.concatenate(features) | |
| all_features.append(features_flat) | |
| # Convert to numpy array | |
| all_features = np.array(all_features) | |
| # %% | |
| # Reduce dimensionality with PCA | |
| pca = PCA(n_components=2) | |
| features_reduced = pca.fit_transform(all_features) | |
| # Perform k-means clustering | |
| kmeans = KMeans(n_clusters=5) # Example: 5 clusters | |
| clusters = kmeans.fit_predict(features_reduced) | |
| # Plot the clusters | |
| plt.figure(figsize=(10, 6)) | |
| scatter = plt.scatter(features_reduced[:, 0], features_reduced[:, 1], c=clusters, cmap='viridis') | |
| plt.title('Clustered Frequency Band Features') | |
| plt.xlabel('Principal Component 1') | |
| plt.ylabel('Principal Component 2') | |
| plt.colorbar(scatter, label='Cluster') | |
| plt.show() | |
| # %% | |
| # Play the audio for a representative sample from each cluster | |
| for cluster_label in np.unique(clusters): | |
| # Find the first data point in the cluster | |
| representative_index = np.where(clusters == cluster_label)[0][0] | |
| # Use the original audio window at the representative index | |
| start_sample = representative_index * hop_samples | |
| end_sample = start_sample + window_samples | |
| y_representative = y[start_sample:end_sample] | |
| print(f"Cluster {cluster_label} representative audio:") | |
| display(Audio(data=y_representative, rate=sr)) | |
| # %% [markdown] | |
| # ## pipeline for all the files | |
| # %% | |
| import os | |
| import numpy as np | |
| import librosa | |
| from sklearn.preprocessing import StandardScaler | |
| import joblib | |
| # Directory containing the audio files | |
| audio_dir = "data/soundscape_data" | |
| # Parameters for windowing | |
| window_size = 10 # window size in seconds | |
| hop_size = 10 # hop size in seconds | |
| # Define frequency bands (in Hz) | |
| bands = { | |
| 'Sub-bass': (20, 60), | |
| 'Bass': (60, 250), | |
| 'Low Midrange': (250, 500), | |
| 'Midrange': (500, 2000), | |
| 'Upper Midrange': (2000, 4000), | |
| 'Presence': (4000, 6000), | |
| 'Brilliance': (6000, 20000) | |
| } | |
| # Directory to save features | |
| features_dir = "features" | |
| os.makedirs(features_dir, exist_ok=True) | |
| # Iterate over each audio file in the directory | |
| for filename in os.listdir(audio_dir): | |
| if filename.endswith(".flac"): | |
| file_path = os.path.join(audio_dir, filename) | |
| y, sr = librosa.load(file_path, sr=44100) | |
| # Convert window and hop size to samples | |
| window_samples = int(window_size * sr) | |
| hop_samples = int(hop_size * sr) | |
| # Total number of windows in the current file | |
| num_windows = (len(y) - window_samples) // hop_samples + 1 | |
| all_features = [] | |
| for i in range(num_windows): | |
| start_sample = i * hop_samples | |
| end_sample = start_sample + window_samples | |
| y_window = y[start_sample:end_sample] | |
| # Compute STFT | |
| S = librosa.stft(y_window) | |
| S_db = librosa.amplitude_to_db(np.abs(S)) | |
| # Compute features for each band | |
| features = [] | |
| for band, (low_freq, high_freq) in bands.items(): | |
| low_bin = int(np.floor(low_freq * (S.shape[0] / sr))) | |
| high_bin = int(np.ceil(high_freq * (S.shape[0] / sr))) | |
| band_energy = np.mean(S_db[low_bin:high_bin, :], axis=0) | |
| features.append(band_energy) | |
| # Flatten the feature array and add to all_features | |
| features_flat = np.concatenate(features) | |
| all_features.append(features_flat) | |
| # Convert to numpy array | |
| all_features = np.array(all_features) | |
| # Standardize features | |
| scaler = StandardScaler() | |
| all_features = scaler.fit_transform(all_features) | |
| # Save features to disk | |
| feature_file = os.path.join(features_dir, f"{os.path.splitext(filename)[0]}_features.npy") | |
| joblib.dump((all_features, scaler), feature_file) | |
| # %% | |
| import numpy as np | |
| import joblib | |
| from sklearn.cluster import KMeans | |
| from sklearn.decomposition import PCA | |
| import matplotlib.pyplot as plt | |
| # Directory to load features | |
| features_dir = "features" | |
| # Load all features | |
| all_features = [] | |
| for feature_file in os.listdir(features_dir): | |
| if feature_file.endswith("_features.npy"): | |
| features, _ = joblib.load(os.path.join(features_dir, feature_file)) | |
| all_features.append(features) | |
| # Combine all features into a single array | |
| all_features = np.vstack(all_features) | |
| # Perform PCA for 2D visualization | |
| pca = PCA(n_components=2) | |
| features_pca = pca.fit_transform(all_features) | |
| # Perform k-means clustering | |
| kmeans = KMeans(n_clusters=5) # Example: 5 clusters | |
| clusters = kmeans.fit_predict(all_features) | |
| # Plot the PCA-reduced features with cluster labels | |
| plt.figure(figsize=(10, 6)) | |
| scatter = plt.scatter(features_pca[:, 0], features_pca[:, 1], c=clusters, cmap='viridis') | |
| plt.title('PCA of Clustered Frequency Band Features') | |
| plt.xlabel('Principal Component 1') | |
| plt.ylabel('Principal Component 2') | |
| plt.colorbar(scatter, label='Cluster') | |
| plt.show() | |
| # Save clustering results | |
| clustering_results = { | |
| 'clusters': clusters, | |
| 'kmeans': kmeans, | |
| 'pca': pca | |
| } | |
| joblib.dump(clustering_results, 'clustering_results.pkl') | |
| # Plot the clusters | |
| plt.figure(figsize=(10, 6)) | |
| for i in range(5): | |
| plt.plot(all_features[clusters == i].mean(axis=0), label=f'Cluster {i}') | |
| plt.legend() | |
| plt.title('Clustered Frequency Band Features') | |
| plt.show() | |
| # %% | |
| import os | |
| import numpy as np | |
| import librosa | |
| from IPython.display import Audio, display | |
| import joblib | |
| # Directory containing the audio files | |
| audio_dir = "data/soundscape_data" | |
| # Directory to load features | |
| features_dir = "features" | |
| # Parameters for windowing | |
| window_size = 10 # window size in seconds | |
| hop_size = 10 # hop size in seconds | |
| # Load clustering results | |
| clustering_results = joblib.load('clustering_results.pkl') | |
| clusters = clustering_results['clusters'] | |
| # Load all features | |
| all_features = [] | |
| audio_segments = [] | |
| for feature_file in os.listdir(features_dir): | |
| if feature_file.endswith("_features.npy"): | |
| features, scaler = joblib.load(os.path.join(features_dir, feature_file)) | |
| filename = feature_file.replace('_features.npy', '.flac') | |
| file_path = os.path.join(audio_dir, filename) | |
| y, sr = librosa.load(file_path, sr=44100) | |
| # Convert window and hop size to samples | |
| window_samples = int(window_size * sr) | |
| hop_samples = int(hop_size * sr) | |
| num_windows = (len(y) - window_samples) // hop_samples + 1 | |
| for i in range(num_windows): | |
| start_sample = i * hop_samples | |
| end_sample = start_sample + window_samples | |
| y_window = y[start_sample:end_sample] | |
| audio_segments.append(y_window) | |
| all_features.append(features) | |
| # Flatten the list of all features | |
| all_features = np.vstack(all_features) | |
| # Play the audio for a representative sample from each cluster | |
| for cluster_label in np.unique(clusters): | |
| try: | |
| # Find the first data point in the cluster | |
| representative_index = np.where(clusters == cluster_label)[0][0] | |
| # Use the original audio segment at the representative index | |
| y_representative = audio_segments[representative_index] | |
| # Check if y_representative is not empty | |
| if y_representative.size == 0: | |
| raise ValueError("The audio segment is empty") | |
| print(f"Cluster {cluster_label} representative audio:") | |
| display(Audio(data=y_representative, rate=sr)) | |
| except Exception as e: | |
| print(f"Could not play audio for cluster {cluster_label}: {e}") | |
| # %% | |