Spaces:

sdbrgo
/

PERCEUL

Sleeping

sdbrgo commited on Dec 17, 2025

Commit

d440d65

verified ·

1 Parent(s): 2ed56a7

update

revised choose_k()

Files changed (1) hide show

cluster_utils.py CHANGED Viewed

@@ -10,19 +10,25 @@ __all__ = [
 ]
 #========== Before Final Clustering ==========
-def choose_k(X_pca, k_range=(2, 12)):
     best_k = 2
     best_score = -1
-    for k in range(k_range[0], k_range[1]):
-        km = KMeans(n_clusters=k, random_state=42)
         labels = km.fit_predict(X_pca)
         score = silhouette_score(X_pca, labels)
         if score > best_score:
             best_score = score
             best_k = k
     return best_k
 #========== During Cluster Analysis ==========

 ]
 #========== Before Final Clustering ==========
+def choose_k(X_pca):
     best_k = 2
     best_score = -1
+    # Ensure k does not exceed n_samples - 1 for silhouette_score validity
+    n_samples = X_pca.shape[0]
+    max_k_for_silhouette = n_samples # range is exclusive of end, so this will allow k up to n_samples - 1
+    for k in range(2, min(12, max_k_for_silhouette)):
+        km = KMeans(n_clusters=k, random_state=42, n_init='auto') # Added n_init='auto' to suppress future warning
         labels = km.fit_predict(X_pca)
         score = silhouette_score(X_pca, labels)
         if score > best_score:
             best_score = score
             best_k = k
+    print(f"Executing choose_k()... Best Score: {best_score}")
     return best_k
 #========== During Cluster Analysis ==========