Spaces:

wwebec
/

empty-string

Build error

App Files Files Community

empty-string / python /eda_jan.py

janwinkler1

first shot eda, with random data

6ea8953 over 1 year ago

raw

history blame

9.5 kB

	# ---
	# jupyter:
	# jupytext:
	# text_representation:
	# extension: .py
	# format_name: percent
	# format_version: '1.3'
	# jupytext_version: 1.16.1
	# kernelspec:
	# display_name: Python 3 (ipykernel)
	# language: python
	# name: python3
	# ---

	# %%
	import os
	import numpy as np
	import librosa
	import librosa.display
	import matplotlib.pyplot as plt
	from sklearn.cluster import KMeans
	from sklearn.decomposition import PCA
	from IPython.display import Audio, display

	# %%
	# Load the entire audio file
	cwd = os.getcwd()
	relative_path = "data/soundscape_data/PER_001_S01_20190116_100007Z.flac"
	file_path = os.path.join(cwd, relative_path)
	y, sr = librosa.load(file_path, sr=44100)

	# %%
	# split soundfile in to 10s chunks
	window_size = 10 # window size in seconds
	hop_size = 10 # hop size in seconds

	# Convert window and hop size to samples
	window_samples = int(window_size * sr)
	hop_samples = int(hop_size * sr)

	# Total number of windows
	num_windows = (len(y) - window_samples) // hop_samples + 1

	print(f"Total number of windows: {num_windows}")


	# %%
	# Define frequency bands (in Hz)
	bands = {
	'Sub-bass': (20, 60),
	'Bass': (60, 250),
	'Low Midrange': (250, 500),
	'Midrange': (500, 2000),
	'Upper Midrange': (2000, 4000),
	'Presence': (4000, 6000),
	'Brilliance': (6000, 20000)
	}

	# Initialize a list to hold the features
	all_features = []

	for i in range(num_windows):
	start_sample = i * hop_samples
	end_sample = start_sample + window_samples
	y_window = y[start_sample:end_sample]

	# Compute STFT
	S = librosa.stft(y_window)
	S_db = librosa.amplitude_to_db(np.abs(S))

	# Compute features for each band
	features = []
	for band, (low_freq, high_freq) in bands.items():
	low_bin = int(np.floor(low_freq * (S.shape[0] / sr)))
	high_bin = int(np.ceil(high_freq * (S.shape[0] / sr)))
	band_energy = np.mean(S_db[low_bin:high_bin, :], axis=0)
	features.append(band_energy)

	# Flatten the feature array and add to all_features
	features_flat = np.concatenate(features)
	all_features.append(features_flat)

	# Convert to numpy array
	all_features = np.array(all_features)


	# %%
	# Reduce dimensionality with PCA
	pca = PCA(n_components=2)
	features_reduced = pca.fit_transform(all_features)

	# Perform k-means clustering
	kmeans = KMeans(n_clusters=5) # Example: 5 clusters
	clusters = kmeans.fit_predict(features_reduced)

	# Plot the clusters
	plt.figure(figsize=(10, 6))
	scatter = plt.scatter(features_reduced[:, 0], features_reduced[:, 1], c=clusters, cmap='viridis')
	plt.title('Clustered Frequency Band Features')
	plt.xlabel('Principal Component 1')
	plt.ylabel('Principal Component 2')
	plt.colorbar(scatter, label='Cluster')
	plt.show()


	# %%
	# Play the audio for a representative sample from each cluster
	for cluster_label in np.unique(clusters):
	# Find the first data point in the cluster
	representative_index = np.where(clusters == cluster_label)[0][0]

	# Use the original audio window at the representative index
	start_sample = representative_index * hop_samples
	end_sample = start_sample + window_samples
	y_representative = y[start_sample:end_sample]

	print(f"Cluster {cluster_label} representative audio:")
	display(Audio(data=y_representative, rate=sr))


	# %% [markdown]
	# ## pipeline for all the files

	# %%
	import os
	import numpy as np
	import librosa
	from sklearn.preprocessing import StandardScaler
	import joblib

	# Directory containing the audio files
	audio_dir = "data/soundscape_data"

	# Parameters for windowing
	window_size = 10 # window size in seconds
	hop_size = 10 # hop size in seconds

	# Define frequency bands (in Hz)
	bands = {
	'Sub-bass': (20, 60),
	'Bass': (60, 250),
	'Low Midrange': (250, 500),
	'Midrange': (500, 2000),
	'Upper Midrange': (2000, 4000),
	'Presence': (4000, 6000),
	'Brilliance': (6000, 20000)
	}

	# Directory to save features
	features_dir = "features"
	os.makedirs(features_dir, exist_ok=True)

	# Iterate over each audio file in the directory
	for filename in os.listdir(audio_dir):
	if filename.endswith(".flac"):
	file_path = os.path.join(audio_dir, filename)
	y, sr = librosa.load(file_path, sr=44100)

	# Convert window and hop size to samples
	window_samples = int(window_size * sr)
	hop_samples = int(hop_size * sr)

	# Total number of windows in the current file
	num_windows = (len(y) - window_samples) // hop_samples + 1

	all_features = []

	for i in range(num_windows):
	start_sample = i * hop_samples
	end_sample = start_sample + window_samples
	y_window = y[start_sample:end_sample]

	# Compute STFT
	S = librosa.stft(y_window)
	S_db = librosa.amplitude_to_db(np.abs(S))

	# Compute features for each band
	features = []
	for band, (low_freq, high_freq) in bands.items():
	low_bin = int(np.floor(low_freq * (S.shape[0] / sr)))
	high_bin = int(np.ceil(high_freq * (S.shape[0] / sr)))
	band_energy = np.mean(S_db[low_bin:high_bin, :], axis=0)
	features.append(band_energy)

	# Flatten the feature array and add to all_features
	features_flat = np.concatenate(features)
	all_features.append(features_flat)

	# Convert to numpy array
	all_features = np.array(all_features)

	# Standardize features
	scaler = StandardScaler()
	all_features = scaler.fit_transform(all_features)

	# Save features to disk
	feature_file = os.path.join(features_dir, f"{os.path.splitext(filename)[0]}_features.npy")
	joblib.dump((all_features, scaler), feature_file)


	# %%
	import numpy as np
	import joblib
	from sklearn.cluster import KMeans
	from sklearn.decomposition import PCA
	import matplotlib.pyplot as plt

	# Directory to load features
	features_dir = "features"

	# Load all features
	all_features = []
	for feature_file in os.listdir(features_dir):
	if feature_file.endswith("_features.npy"):
	features, _ = joblib.load(os.path.join(features_dir, feature_file))
	all_features.append(features)

	# Combine all features into a single array
	all_features = np.vstack(all_features)

	# Perform PCA for 2D visualization
	pca = PCA(n_components=2)
	features_pca = pca.fit_transform(all_features)

	# Perform k-means clustering
	kmeans = KMeans(n_clusters=5) # Example: 5 clusters
	clusters = kmeans.fit_predict(all_features)

	# Plot the PCA-reduced features with cluster labels
	plt.figure(figsize=(10, 6))
	scatter = plt.scatter(features_pca[:, 0], features_pca[:, 1], c=clusters, cmap='viridis')
	plt.title('PCA of Clustered Frequency Band Features')
	plt.xlabel('Principal Component 1')
	plt.ylabel('Principal Component 2')
	plt.colorbar(scatter, label='Cluster')
	plt.show()

	# Save clustering results
	clustering_results = {
	'clusters': clusters,
	'kmeans': kmeans,
	'pca': pca
	}
	joblib.dump(clustering_results, 'clustering_results.pkl')

	# Plot the clusters
	plt.figure(figsize=(10, 6))
	for i in range(5):
	plt.plot(all_features[clusters == i].mean(axis=0), label=f'Cluster {i}')
	plt.legend()
	plt.title('Clustered Frequency Band Features')
	plt.show()

	# %%
	import os
	import numpy as np
	import librosa
	from IPython.display import Audio, display
	import joblib

	# Directory containing the audio files
	audio_dir = "data/soundscape_data"
	# Directory to load features
	features_dir = "features"

	# Parameters for windowing
	window_size = 10 # window size in seconds
	hop_size = 10 # hop size in seconds

	# Load clustering results
	clustering_results = joblib.load('clustering_results.pkl')
	clusters = clustering_results['clusters']

	# Load all features
	all_features = []
	audio_segments = []

	for feature_file in os.listdir(features_dir):
	if feature_file.endswith("_features.npy"):
	features, scaler = joblib.load(os.path.join(features_dir, feature_file))
	filename = feature_file.replace('_features.npy', '.flac')
	file_path = os.path.join(audio_dir, filename)
	y, sr = librosa.load(file_path, sr=44100)

	# Convert window and hop size to samples
	window_samples = int(window_size * sr)
	hop_samples = int(hop_size * sr)

	num_windows = (len(y) - window_samples) // hop_samples + 1
	for i in range(num_windows):
	start_sample = i * hop_samples
	end_sample = start_sample + window_samples
	y_window = y[start_sample:end_sample]
	audio_segments.append(y_window)
	all_features.append(features)

	# Flatten the list of all features
	all_features = np.vstack(all_features)

	# Play the audio for a representative sample from each cluster
	for cluster_label in np.unique(clusters):
	try:
	# Find the first data point in the cluster
	representative_index = np.where(clusters == cluster_label)[0][0]

	# Use the original audio segment at the representative index
	y_representative = audio_segments[representative_index]

	# Check if y_representative is not empty
	if y_representative.size == 0:
	raise ValueError("The audio segment is empty")

	print(f"Cluster {cluster_label} representative audio:")
	display(Audio(data=y_representative, rate=sr))

	except Exception as e:
	print(f"Could not play audio for cluster {cluster_label}: {e}")


	# %%