| | from matplotlib import pyplot as plt |
| | from sklearn.cluster import KMeans |
| | from sklearn.metrics import silhouette_score |
| |
|
| |
|
| | def calculate_wcss(data): |
| | wcss = [] |
| | for i in range(1, 11): |
| | kmeans = KMeans(n_clusters=i, init='k-means++', max_iter=300, n_init=10, random_state=0) |
| | kmeans.fit(data) |
| | wcss.append(kmeans.inertia_) |
| | return wcss |
| |
|
| | def calculate_silhouette_scores(data): |
| | scores = [] |
| | range_values = range(2, 11) |
| | for i in range_values: |
| | kmeans = KMeans(n_clusters=i, init='k-means++', max_iter=300, n_init=10, random_state=0) |
| | kmeans.fit(data) |
| | score = silhouette_score(data, kmeans.labels_, metric='euclidean') |
| | scores.append(score) |
| | return scores |
| |
|
| | def plot_elbow(wcss): |
| | plt.plot(range(1, 11), wcss) |
| | plt.title('Elbow Method') |
| | plt.xlabel('Number of clusters') |
| | plt.ylabel('WCSS') |
| | plt.show() |
| |
|
| | def get_optimal_clusters_silhouette(scores): |
| | optimal_clusters = scores.index(max(scores)) + 2 |
| | print(f"Optimal number of clusters: {optimal_clusters}") |
| | return optimal_clusters |
| |
|
| | def fit_kmeans(data, n_clusters): |
| | kmeans = KMeans(n_clusters=n_clusters, random_state=0) |
| | clusters = kmeans.fit_predict(data) |
| | data['cluster'] = clusters |
| | return kmeans, data |