sdbrgo commited on
Commit
d440d65
·
verified ·
1 Parent(s): 2ed56a7

revised choose_k()

Files changed (1) hide show
  1. cluster_utils.py +11 -5
cluster_utils.py CHANGED
@@ -10,19 +10,25 @@ __all__ = [
10
  ]
11
 
12
  #========== Before Final Clustering ==========
13
- def choose_k(X_pca, k_range=(2, 12)):
14
  best_k = 2
15
  best_score = -1
16
 
17
- for k in range(k_range[0], k_range[1]):
18
- km = KMeans(n_clusters=k, random_state=42)
 
 
 
 
19
  labels = km.fit_predict(X_pca)
20
  score = silhouette_score(X_pca, labels)
21
-
22
  if score > best_score:
23
  best_score = score
24
  best_k = k
25
-
 
 
26
  return best_k
27
 
28
  #========== During Cluster Analysis ==========
 
10
  ]
11
 
12
  #========== Before Final Clustering ==========
13
+ def choose_k(X_pca):
14
  best_k = 2
15
  best_score = -1
16
 
17
+ # Ensure k does not exceed n_samples - 1 for silhouette_score validity
18
+ n_samples = X_pca.shape[0]
19
+ max_k_for_silhouette = n_samples # range is exclusive of end, so this will allow k up to n_samples - 1
20
+
21
+ for k in range(2, min(12, max_k_for_silhouette)):
22
+ km = KMeans(n_clusters=k, random_state=42, n_init='auto') # Added n_init='auto' to suppress future warning
23
  labels = km.fit_predict(X_pca)
24
  score = silhouette_score(X_pca, labels)
25
+
26
  if score > best_score:
27
  best_score = score
28
  best_k = k
29
+
30
+ print(f"Executing choose_k()... Best Score: {best_score}")
31
+
32
  return best_k
33
 
34
  #========== During Cluster Analysis ==========