Spaces:
Sleeping
Sleeping
Commit
路
8951751
1
Parent(s):
48baa77
Delete Max Silohuette and Show Cluster Density
Browse files
app.py
CHANGED
|
@@ -617,36 +617,58 @@ def compute_global_regression(df_combined, embedding_cols, tsne_params, df_f1, r
|
|
| 617 |
|
| 618 |
silhouette = np.max(silhouette_vals)
|
| 619 |
|
| 620 |
-
|
| 621 |
-
|
| 622 |
|
| 623 |
-
|
| 624 |
-
|
| 625 |
-
|
| 626 |
-
|
| 627 |
|
| 628 |
|
| 629 |
-
|
| 630 |
-
|
| 631 |
|
| 632 |
|
| 633 |
-
silhouettes_test = []
|
| 634 |
-
K = range(2, 20)
|
| 635 |
|
| 636 |
-
for k in K:
|
| 637 |
-
|
| 638 |
-
|
| 639 |
-
|
| 640 |
-
|
| 641 |
|
| 642 |
-
inertias = silhouettes_test
|
| 643 |
-
best_k = K[np.argmax(silhouettes_test)]
|
| 644 |
-
elbow_k = best_k
|
| 645 |
|
| 646 |
kmeans_opt = KMeans(n_clusters=elbow_k, random_state=42, n_init=10)
|
| 647 |
labels_opt = kmeans_opt.fit_predict(X)
|
| 648 |
silhouette_opt = silhouette_score(X, labels_opt)
|
| 649 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 650 |
|
| 651 |
dfs_reduced, unique_subsets = split_versions(df_combined, reduced)
|
| 652 |
|
|
@@ -727,6 +749,9 @@ def compute_global_regression(df_combined, embedding_cols, tsne_params, df_f1, r
|
|
| 727 |
"silhouette_opt": silhouette_opt,
|
| 728 |
"elbow_k": elbow_k,
|
| 729 |
"classes_k": labels_opt,
|
|
|
|
|
|
|
|
|
|
| 730 |
}
|
| 731 |
|
| 732 |
if reduction_method == "PCA":
|
|
@@ -888,6 +913,19 @@ def run_model(model_name):
|
|
| 888 |
st.write(f"Silhouette Score: {result['silhouette_opt']:.2f}")
|
| 889 |
st.write(f"Optimal number of clusters (k) from Elbow Method: {result['elbow_k']:.2f}")
|
| 890 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 891 |
# # Mostrar los plots de loadings si se us贸 PCA (para el conjunto combinado)
|
| 892 |
# if reduction_method == "PCA" and result.get("pca_model") is not None:
|
| 893 |
# # pca_model = result["pca_model"]
|
|
|
|
| 617 |
|
| 618 |
silhouette = np.max(silhouette_vals)
|
| 619 |
|
| 620 |
+
inertias = []
|
| 621 |
+
K = range(1, 20)
|
| 622 |
|
| 623 |
+
for k in K:
|
| 624 |
+
kmeans = KMeans(n_clusters=k, random_state=42)
|
| 625 |
+
kmeans.fit(reduced_real)
|
| 626 |
+
inertias.append(kmeans.inertia_)
|
| 627 |
|
| 628 |
|
| 629 |
+
kl = KneeLocator(K, inertias, curve="convex", direction="decreasing")
|
| 630 |
+
elbow_k = kl.elbow
|
| 631 |
|
| 632 |
|
| 633 |
+
# silhouettes_test = []
|
| 634 |
+
# K = range(2, 20)
|
| 635 |
|
| 636 |
+
# for k in K:
|
| 637 |
+
# kmeans = KMeans(n_clusters=k, random_state=42, n_init=10)
|
| 638 |
+
# labels = kmeans.fit_predict(X)
|
| 639 |
+
# sil = silhouette_score(X, labels)
|
| 640 |
+
# silhouettes_test.append(sil)
|
| 641 |
|
| 642 |
+
# inertias = silhouettes_test
|
| 643 |
+
# best_k = K[np.argmax(silhouettes_test)]
|
| 644 |
+
# elbow_k = best_k
|
| 645 |
|
| 646 |
kmeans_opt = KMeans(n_clusters=elbow_k, random_state=42, n_init=10)
|
| 647 |
labels_opt = kmeans_opt.fit_predict(X)
|
| 648 |
silhouette_opt = silhouette_score(X, labels_opt)
|
| 649 |
|
| 650 |
+
centers = kmeans_opt.cluster_centers_
|
| 651 |
+
radii, densities, volumes = [], [], []
|
| 652 |
+
|
| 653 |
+
for i in range(elbow_k):
|
| 654 |
+
cluster_points = reduced_real[labels_opt == i]
|
| 655 |
+
n_points = len(cluster_points)
|
| 656 |
+
|
| 657 |
+
# Distancias eucl铆deas al centroide
|
| 658 |
+
dists = np.linalg.norm(cluster_points - centers[i], axis=1)
|
| 659 |
+
|
| 660 |
+
# Radio m谩ximo (engloba todo el cluster)
|
| 661 |
+
r = dists.max()
|
| 662 |
+
radii.append(r)
|
| 663 |
+
|
| 664 |
+
# Volumen de la esfera
|
| 665 |
+
V = (4/3) * np.pi * (r ** 3)
|
| 666 |
+
volumes.append(V)
|
| 667 |
+
|
| 668 |
+
# Densidad = n煤mero de puntos / volumen
|
| 669 |
+
density = n_points / V if V > 0 else np.nan
|
| 670 |
+
densities.append(density)
|
| 671 |
+
|
| 672 |
|
| 673 |
dfs_reduced, unique_subsets = split_versions(df_combined, reduced)
|
| 674 |
|
|
|
|
| 749 |
"silhouette_opt": silhouette_opt,
|
| 750 |
"elbow_k": elbow_k,
|
| 751 |
"classes_k": labels_opt,
|
| 752 |
+
"centers_k": centers,
|
| 753 |
+
"radii_k": radii,
|
| 754 |
+
"densities_k": densities,
|
| 755 |
}
|
| 756 |
|
| 757 |
if reduction_method == "PCA":
|
|
|
|
| 913 |
st.write(f"Silhouette Score: {result['silhouette_opt']:.2f}")
|
| 914 |
st.write(f"Optimal number of clusters (k) from Elbow Method: {result['elbow_k']:.2f}")
|
| 915 |
|
| 916 |
+
if "radii_k" in result and "densities_k" in result:
|
| 917 |
+
st.subheader("Cluster Geometry (Radius & Density)")
|
| 918 |
+
|
| 919 |
+
df_clusters = pd.DataFrame({
|
| 920 |
+
"Cluster": np.arange(len(result["radii_k"])),
|
| 921 |
+
"Radius": np.round(result["radii_k"], 4),
|
| 922 |
+
"Density": np.round(result["densities_k"], 6)
|
| 923 |
+
})
|
| 924 |
+
|
| 925 |
+
# Mostrar tabla
|
| 926 |
+
st.dataframe(df_clusters, use_container_width=True)
|
| 927 |
+
# st.table(df_clusters)
|
| 928 |
+
|
| 929 |
# # Mostrar los plots de loadings si se us贸 PCA (para el conjunto combinado)
|
| 930 |
# if reduction_method == "PCA" and result.get("pca_model") is not None:
|
| 931 |
# # pca_model = result["pca_model"]
|