de-Rodrigo commited on
Commit
f651651
1 Parent(s): 45eff9e

Silohuette After Optimal Num of Clusters K-Means

Browse files
Files changed (1) hide show
  1. app.py +11 -0
app.py CHANGED
@@ -22,6 +22,7 @@ import matplotlib.pyplot as plt
22
  import matplotlib.colors as mcolors
23
  import zipfile
24
  import tempfile
 
25
 
26
 
27
  class RelativeScaler(BaseEstimator, TransformerMixin):
@@ -624,6 +625,13 @@ def compute_global_regression(df_combined, embedding_cols, tsne_params, df_f1, r
624
  inertias.append(kmeans.inertia_)
625
 
626
 
 
 
 
 
 
 
 
627
  dfs_reduced, unique_subsets = split_versions(df_combined, reduced)
628
 
629
  df_distances = compute_cluster_distances_synthetic_individual(
@@ -700,6 +708,7 @@ def compute_global_regression(df_combined, embedding_cols, tsne_params, df_f1, r
700
  "continuity": cont,
701
  "silhouette": silhouette,
702
  "inertias": inertias,
 
703
  }
704
 
705
  if reduction_method == "PCA":
@@ -858,6 +867,8 @@ def run_model(model_name):
858
 
859
  st.bokeh_chart(p, use_container_width=True)
860
 
 
 
861
  # # Mostrar los plots de loadings si se us贸 PCA (para el conjunto combinado)
862
  # if reduction_method == "PCA" and result.get("pca_model") is not None:
863
  # # pca_model = result["pca_model"]
 
22
  import matplotlib.colors as mcolors
23
  import zipfile
24
  import tempfile
25
+ from kneed import KneeLocator
26
 
27
 
28
  class RelativeScaler(BaseEstimator, TransformerMixin):
 
625
  inertias.append(kmeans.inertia_)
626
 
627
 
628
+ kl = KneeLocator(K, inertias, curve="convex", direction="decreasing")
629
+ elbow_k = kl.elbow
630
+ kmeans_opt = KMeans(n_clusters=elbow_k, random_state=42, n_init=10)
631
+ labels_opt = kmeans_opt.fit_predict(X)
632
+ silhouette_opt = silhouette_score(X, labels_opt)
633
+
634
+
635
  dfs_reduced, unique_subsets = split_versions(df_combined, reduced)
636
 
637
  df_distances = compute_cluster_distances_synthetic_individual(
 
708
  "continuity": cont,
709
  "silhouette": silhouette,
710
  "inertias": inertias,
711
+ "silhouette_opt": silhouette_opt,
712
  }
713
 
714
  if reduction_method == "PCA":
 
867
 
868
  st.bokeh_chart(p, use_container_width=True)
869
 
870
+ st.write(f"Silhouette Score: {result['silhouette_opt']:.2f}")
871
+
872
  # # Mostrar los plots de loadings si se us贸 PCA (para el conjunto combinado)
873
  # if reduction_method == "PCA" and result.get("pca_model") is not None:
874
  # # pca_model = result["pca_model"]