Scikit-Learn-Homework / sklearn_clustering2.py
ewdlop's picture
Upload 5 files (#1)
9b316cc verified
from sklearn.datasets import make_blobs
X ,y = make_blobs(n_samples=150, n_features=2, centers=3, cluster_std= 0.5, shuffle= True, random_state= 0)
import matplotlib.pyplot as plt
#plt.scatter(X[:,0], X[:,1], c='white', marker='o', edgecolors='black', s=50)
#plt.grid()
#plt.show()
from sklearn.cluster import KMeans
km = KMeans(n_clusters=3, init='random', n_init=10, max_iter=300, tol=1e-04, random_state=0)
y_km = km.fit_predict(X)
print(y_km)
#plt.scatter(X[y_km == 0, 0], X[y_km == 0, 1], s=50, c='lightgreen',marker='s', edgecolor='black',label='cluster 1')
#plt.scatter(X[y_km == 1, 0], X[y_km == 1, 1], s=50, c='orange',marker='o', edgecolor='black',label='cluster 2')
#plt.scatter(X[y_km == 2, 0], X[y_km == 2, 1], s=50, c='lightblue',marker='v', edgecolor='black',label='cluster 3')
#plt.scatter(km.cluster_centers_[:,0],km.cluster_centers_[:,1], s=250, marker='*', c='red', edgecolors='black', label = 'centroids')
#plt.legend(scatterpoints=1)
#plt.grid()
#plt.show()
distortions = []
for i in range(1,11):
km = KMeans(n_clusters=i, init='k-means++', n_init=10, max_iter=300, random_state = 0)
km.fit(X)
distortions.append(km.inertia_)
plt.plot(range(1,11), distortions, marker = 'o')
plt.xlabel('Number of clusters')
plt.ylabel('Distortion')
plt.show()