de-Rodrigo commited on
Commit
8951751
1 Parent(s): 48baa77

Delete Max Silohuette and Show Cluster Density

Browse files
Files changed (1) hide show
  1. app.py +56 -18
app.py CHANGED
@@ -617,36 +617,58 @@ def compute_global_regression(df_combined, embedding_cols, tsne_params, df_f1, r
617
 
618
  silhouette = np.max(silhouette_vals)
619
 
620
- # inertias = []
621
- # K = range(1, 20)
622
 
623
- # for k in K:
624
- # kmeans = KMeans(n_clusters=k, random_state=42)
625
- # kmeans.fit(reduced_real)
626
- # inertias.append(kmeans.inertia_)
627
 
628
 
629
- # kl = KneeLocator(K, inertias, curve="convex", direction="decreasing")
630
- # elbow_k = kl.elbow
631
 
632
 
633
- silhouettes_test = []
634
- K = range(2, 20)
635
 
636
- for k in K:
637
- kmeans = KMeans(n_clusters=k, random_state=42, n_init=10)
638
- labels = kmeans.fit_predict(X)
639
- sil = silhouette_score(X, labels)
640
- silhouettes_test.append(sil)
641
 
642
- inertias = silhouettes_test
643
- best_k = K[np.argmax(silhouettes_test)]
644
- elbow_k = best_k
645
 
646
  kmeans_opt = KMeans(n_clusters=elbow_k, random_state=42, n_init=10)
647
  labels_opt = kmeans_opt.fit_predict(X)
648
  silhouette_opt = silhouette_score(X, labels_opt)
649
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
650
 
651
  dfs_reduced, unique_subsets = split_versions(df_combined, reduced)
652
 
@@ -727,6 +749,9 @@ def compute_global_regression(df_combined, embedding_cols, tsne_params, df_f1, r
727
  "silhouette_opt": silhouette_opt,
728
  "elbow_k": elbow_k,
729
  "classes_k": labels_opt,
 
 
 
730
  }
731
 
732
  if reduction_method == "PCA":
@@ -888,6 +913,19 @@ def run_model(model_name):
888
  st.write(f"Silhouette Score: {result['silhouette_opt']:.2f}")
889
  st.write(f"Optimal number of clusters (k) from Elbow Method: {result['elbow_k']:.2f}")
890
 
 
 
 
 
 
 
 
 
 
 
 
 
 
891
  # # Mostrar los plots de loadings si se us贸 PCA (para el conjunto combinado)
892
  # if reduction_method == "PCA" and result.get("pca_model") is not None:
893
  # # pca_model = result["pca_model"]
 
617
 
618
  silhouette = np.max(silhouette_vals)
619
 
620
+ inertias = []
621
+ K = range(1, 20)
622
 
623
+ for k in K:
624
+ kmeans = KMeans(n_clusters=k, random_state=42)
625
+ kmeans.fit(reduced_real)
626
+ inertias.append(kmeans.inertia_)
627
 
628
 
629
+ kl = KneeLocator(K, inertias, curve="convex", direction="decreasing")
630
+ elbow_k = kl.elbow
631
 
632
 
633
+ # silhouettes_test = []
634
+ # K = range(2, 20)
635
 
636
+ # for k in K:
637
+ # kmeans = KMeans(n_clusters=k, random_state=42, n_init=10)
638
+ # labels = kmeans.fit_predict(X)
639
+ # sil = silhouette_score(X, labels)
640
+ # silhouettes_test.append(sil)
641
 
642
+ # inertias = silhouettes_test
643
+ # best_k = K[np.argmax(silhouettes_test)]
644
+ # elbow_k = best_k
645
 
646
  kmeans_opt = KMeans(n_clusters=elbow_k, random_state=42, n_init=10)
647
  labels_opt = kmeans_opt.fit_predict(X)
648
  silhouette_opt = silhouette_score(X, labels_opt)
649
 
650
+ centers = kmeans_opt.cluster_centers_
651
+ radii, densities, volumes = [], [], []
652
+
653
+ for i in range(elbow_k):
654
+ cluster_points = reduced_real[labels_opt == i]
655
+ n_points = len(cluster_points)
656
+
657
+ # Distancias eucl铆deas al centroide
658
+ dists = np.linalg.norm(cluster_points - centers[i], axis=1)
659
+
660
+ # Radio m谩ximo (engloba todo el cluster)
661
+ r = dists.max()
662
+ radii.append(r)
663
+
664
+ # Volumen de la esfera
665
+ V = (4/3) * np.pi * (r ** 3)
666
+ volumes.append(V)
667
+
668
+ # Densidad = n煤mero de puntos / volumen
669
+ density = n_points / V if V > 0 else np.nan
670
+ densities.append(density)
671
+
672
 
673
  dfs_reduced, unique_subsets = split_versions(df_combined, reduced)
674
 
 
749
  "silhouette_opt": silhouette_opt,
750
  "elbow_k": elbow_k,
751
  "classes_k": labels_opt,
752
+ "centers_k": centers,
753
+ "radii_k": radii,
754
+ "densities_k": densities,
755
  }
756
 
757
  if reduction_method == "PCA":
 
913
  st.write(f"Silhouette Score: {result['silhouette_opt']:.2f}")
914
  st.write(f"Optimal number of clusters (k) from Elbow Method: {result['elbow_k']:.2f}")
915
 
916
+ if "radii_k" in result and "densities_k" in result:
917
+ st.subheader("Cluster Geometry (Radius & Density)")
918
+
919
+ df_clusters = pd.DataFrame({
920
+ "Cluster": np.arange(len(result["radii_k"])),
921
+ "Radius": np.round(result["radii_k"], 4),
922
+ "Density": np.round(result["densities_k"], 6)
923
+ })
924
+
925
+ # Mostrar tabla
926
+ st.dataframe(df_clusters, use_container_width=True)
927
+ # st.table(df_clusters)
928
+
929
  # # Mostrar los plots de loadings si se us贸 PCA (para el conjunto combinado)
930
  # if reduction_method == "PCA" and result.get("pca_model") is not None:
931
  # # pca_model = result["pca_model"]