| """ | |
| Common utilities for testing clustering. | |
| """ | |
| import numpy as np | |
| ############################################################################### | |
| # Generate sample data | |
| def generate_clustered_data( | |
| seed=0, n_clusters=3, n_features=2, n_samples_per_cluster=20, std=0.4 | |
| ): | |
| prng = np.random.RandomState(seed) | |
| # the data is voluntary shifted away from zero to check clustering | |
| # algorithm robustness with regards to non centered data | |
| means = ( | |
| np.array( | |
| [ | |
| [1, 1, 1, 0], | |
| [-1, -1, 0, 1], | |
| [1, -1, 1, 1], | |
| [-1, 1, 1, 0], | |
| ] | |
| ) | |
| + 10 | |
| ) | |
| X = np.empty((0, n_features)) | |
| for i in range(n_clusters): | |
| X = np.r_[ | |
| X, | |
| means[i][:n_features] + std * prng.randn(n_samples_per_cluster, n_features), | |
| ] | |
| return X | |