# --- # jupyter: # jupytext: # text_representation: # extension: .py # format_name: percent # format_version: '1.3' # jupytext_version: 1.16.1 # kernelspec: # display_name: Python 3 (ipykernel) # language: python # name: python3 # --- # %% import os import numpy as np import librosa import librosa.display import matplotlib.pyplot as plt from sklearn.cluster import KMeans from sklearn.decomposition import PCA from IPython.display import Audio, display # %% # Load the entire audio file cwd = os.getcwd() relative_path = "data/soundscape_data/PER_001_S01_20190116_100007Z.flac" file_path = os.path.join(cwd, relative_path) y, sr = librosa.load(file_path, sr=44100) # %% # split soundfile in to 10s chunks window_size = 10 # window size in seconds hop_size = 10 # hop size in seconds # Convert window and hop size to samples window_samples = int(window_size * sr) hop_samples = int(hop_size * sr) # Total number of windows num_windows = (len(y) - window_samples) // hop_samples + 1 print(f"Total number of windows: {num_windows}") # %% # Define frequency bands (in Hz) bands = { 'Sub-bass': (20, 60), 'Bass': (60, 250), 'Low Midrange': (250, 500), 'Midrange': (500, 2000), 'Upper Midrange': (2000, 4000), 'Presence': (4000, 6000), 'Brilliance': (6000, 20000) } # Initialize a list to hold the features all_features = [] for i in range(num_windows): start_sample = i * hop_samples end_sample = start_sample + window_samples y_window = y[start_sample:end_sample] # Compute STFT S = librosa.stft(y_window) S_db = librosa.amplitude_to_db(np.abs(S)) # Compute features for each band features = [] for band, (low_freq, high_freq) in bands.items(): low_bin = int(np.floor(low_freq * (S.shape[0] / sr))) high_bin = int(np.ceil(high_freq * (S.shape[0] / sr))) band_energy = np.mean(S_db[low_bin:high_bin, :], axis=0) features.append(band_energy) # Flatten the feature array and add to all_features features_flat = np.concatenate(features) all_features.append(features_flat) # Convert to numpy array all_features = np.array(all_features) # %% # Reduce dimensionality with PCA pca = PCA(n_components=2) features_reduced = pca.fit_transform(all_features) # Perform k-means clustering kmeans = KMeans(n_clusters=5) # Example: 5 clusters clusters = kmeans.fit_predict(features_reduced) # Plot the clusters plt.figure(figsize=(10, 6)) scatter = plt.scatter(features_reduced[:, 0], features_reduced[:, 1], c=clusters, cmap='viridis') plt.title('Clustered Frequency Band Features') plt.xlabel('Principal Component 1') plt.ylabel('Principal Component 2') plt.colorbar(scatter, label='Cluster') plt.show() # %% # Play the audio for a representative sample from each cluster for cluster_label in np.unique(clusters): # Find the first data point in the cluster representative_index = np.where(clusters == cluster_label)[0][0] # Use the original audio window at the representative index start_sample = representative_index * hop_samples end_sample = start_sample + window_samples y_representative = y[start_sample:end_sample] print(f"Cluster {cluster_label} representative audio:") display(Audio(data=y_representative, rate=sr)) # %% [markdown] # ## pipeline for all the files # %% import os import numpy as np import librosa from sklearn.preprocessing import StandardScaler import joblib # Directory containing the audio files audio_dir = "data/soundscape_data" # Parameters for windowing window_size = 10 # window size in seconds hop_size = 10 # hop size in seconds # Define frequency bands (in Hz) bands = { 'Sub-bass': (20, 60), 'Bass': (60, 250), 'Low Midrange': (250, 500), 'Midrange': (500, 2000), 'Upper Midrange': (2000, 4000), 'Presence': (4000, 6000), 'Brilliance': (6000, 20000) } # Directory to save features features_dir = "features" os.makedirs(features_dir, exist_ok=True) # Iterate over each audio file in the directory for filename in os.listdir(audio_dir): if filename.endswith(".flac"): file_path = os.path.join(audio_dir, filename) y, sr = librosa.load(file_path, sr=44100) # Convert window and hop size to samples window_samples = int(window_size * sr) hop_samples = int(hop_size * sr) # Total number of windows in the current file num_windows = (len(y) - window_samples) // hop_samples + 1 all_features = [] for i in range(num_windows): start_sample = i * hop_samples end_sample = start_sample + window_samples y_window = y[start_sample:end_sample] # Compute STFT S = librosa.stft(y_window) S_db = librosa.amplitude_to_db(np.abs(S)) # Compute features for each band features = [] for band, (low_freq, high_freq) in bands.items(): low_bin = int(np.floor(low_freq * (S.shape[0] / sr))) high_bin = int(np.ceil(high_freq * (S.shape[0] / sr))) band_energy = np.mean(S_db[low_bin:high_bin, :], axis=0) features.append(band_energy) # Flatten the feature array and add to all_features features_flat = np.concatenate(features) all_features.append(features_flat) # Convert to numpy array all_features = np.array(all_features) # Standardize features scaler = StandardScaler() all_features = scaler.fit_transform(all_features) # Save features to disk feature_file = os.path.join(features_dir, f"{os.path.splitext(filename)[0]}_features.npy") joblib.dump((all_features, scaler), feature_file) # %% import numpy as np import joblib from sklearn.cluster import KMeans from sklearn.decomposition import PCA import matplotlib.pyplot as plt # Directory to load features features_dir = "features" # Load all features all_features = [] for feature_file in os.listdir(features_dir): if feature_file.endswith("_features.npy"): features, _ = joblib.load(os.path.join(features_dir, feature_file)) all_features.append(features) # Combine all features into a single array all_features = np.vstack(all_features) # Perform PCA for 2D visualization pca = PCA(n_components=2) features_pca = pca.fit_transform(all_features) # Perform k-means clustering kmeans = KMeans(n_clusters=5) # Example: 5 clusters clusters = kmeans.fit_predict(all_features) # Plot the PCA-reduced features with cluster labels plt.figure(figsize=(10, 6)) scatter = plt.scatter(features_pca[:, 0], features_pca[:, 1], c=clusters, cmap='viridis') plt.title('PCA of Clustered Frequency Band Features') plt.xlabel('Principal Component 1') plt.ylabel('Principal Component 2') plt.colorbar(scatter, label='Cluster') plt.show() # Save clustering results clustering_results = { 'clusters': clusters, 'kmeans': kmeans, 'pca': pca } joblib.dump(clustering_results, 'clustering_results.pkl') # Plot the clusters plt.figure(figsize=(10, 6)) for i in range(5): plt.plot(all_features[clusters == i].mean(axis=0), label=f'Cluster {i}') plt.legend() plt.title('Clustered Frequency Band Features') plt.show() # %% import os import numpy as np import librosa from IPython.display import Audio, display import joblib # Directory containing the audio files audio_dir = "data/soundscape_data" # Directory to load features features_dir = "features" # Parameters for windowing window_size = 10 # window size in seconds hop_size = 10 # hop size in seconds # Load clustering results clustering_results = joblib.load('clustering_results.pkl') clusters = clustering_results['clusters'] # Load all features all_features = [] audio_segments = [] for feature_file in os.listdir(features_dir): if feature_file.endswith("_features.npy"): features, scaler = joblib.load(os.path.join(features_dir, feature_file)) filename = feature_file.replace('_features.npy', '.flac') file_path = os.path.join(audio_dir, filename) y, sr = librosa.load(file_path, sr=44100) # Convert window and hop size to samples window_samples = int(window_size * sr) hop_samples = int(hop_size * sr) num_windows = (len(y) - window_samples) // hop_samples + 1 for i in range(num_windows): start_sample = i * hop_samples end_sample = start_sample + window_samples y_window = y[start_sample:end_sample] audio_segments.append(y_window) all_features.append(features) # Flatten the list of all features all_features = np.vstack(all_features) # Play the audio for a representative sample from each cluster for cluster_label in np.unique(clusters): try: # Find the first data point in the cluster representative_index = np.where(clusters == cluster_label)[0][0] # Use the original audio segment at the representative index y_representative = audio_segments[representative_index] # Check if y_representative is not empty if y_representative.size == 0: raise ValueError("The audio segment is empty") print(f"Cluster {cluster_label} representative audio:") display(Audio(data=y_representative, rate=sr)) except Exception as e: print(f"Could not play audio for cluster {cluster_label}: {e}") # %%