de-Rodrigo commited on
Commit
48baa77
·
1 Parent(s): ee88baf

Elbow as Max Silohuette

Browse files
Files changed (1) hide show
  1. app.py +17 -9
app.py CHANGED
@@ -617,24 +617,32 @@ def compute_global_regression(df_combined, embedding_cols, tsne_params, df_f1, r
617
 
618
  silhouette = np.max(silhouette_vals)
619
 
620
- inertias = []
621
- silohuettes_test = []
622
- K = range(1, 20)
623
 
624
  # for k in K:
625
  # kmeans = KMeans(n_clusters=k, random_state=42)
626
  # kmeans.fit(reduced_real)
627
  # inertias.append(kmeans.inertia_)
 
 
628
  # kl = KneeLocator(K, inertias, curve="convex", direction="decreasing")
 
 
 
 
 
629
 
630
  for k in K:
631
- kmeans = KMeans(n_clusters=k, random_state=42)
632
- kmeans.fit(reduced_real)
633
- labels_opt = kmeans.fit_predict(X)
634
- silohuettes_test.append(silhouette_score(X, labels_opt))
 
 
 
 
635
 
636
- kl = KneeLocator(K, silohuettes_test, curve="convex", direction="decreasing")
637
- elbow_k = kl.elbow
638
  kmeans_opt = KMeans(n_clusters=elbow_k, random_state=42, n_init=10)
639
  labels_opt = kmeans_opt.fit_predict(X)
640
  silhouette_opt = silhouette_score(X, labels_opt)
 
617
 
618
  silhouette = np.max(silhouette_vals)
619
 
620
+ # inertias = []
621
+ # K = range(1, 20)
 
622
 
623
  # for k in K:
624
  # kmeans = KMeans(n_clusters=k, random_state=42)
625
  # kmeans.fit(reduced_real)
626
  # inertias.append(kmeans.inertia_)
627
+
628
+
629
  # kl = KneeLocator(K, inertias, curve="convex", direction="decreasing")
630
+ # elbow_k = kl.elbow
631
+
632
+
633
+ silhouettes_test = []
634
+ K = range(2, 20)
635
 
636
  for k in K:
637
+ kmeans = KMeans(n_clusters=k, random_state=42, n_init=10)
638
+ labels = kmeans.fit_predict(X)
639
+ sil = silhouette_score(X, labels)
640
+ silhouettes_test.append(sil)
641
+
642
+ inertias = silhouettes_test
643
+ best_k = K[np.argmax(silhouettes_test)]
644
+ elbow_k = best_k
645
 
 
 
646
  kmeans_opt = KMeans(n_clusters=elbow_k, random_state=42, n_init=10)
647
  labels_opt = kmeans_opt.fit_predict(X)
648
  silhouette_opt = silhouette_score(X, labels_opt)