Spaces:
Sleeping
Sleeping
Commit
路
f0feb91
1
Parent(s):
aa6ee2f
K-Means
Browse files
app.py
CHANGED
|
@@ -4,11 +4,11 @@ import numpy as np
|
|
| 4 |
from bokeh.plotting import figure
|
| 5 |
from bokeh.models import ColumnDataSource, DataTable, TableColumn, CustomJS, Select, Button, HoverTool, LinearColorMapper, ColorBar, FuncTickFormatter, FixedTicker
|
| 6 |
from bokeh.layouts import column
|
| 7 |
-
from bokeh.palettes import Reds9, Blues9, Oranges9, Purples9, Greys9, BuGn9, Greens9, RdYlGn11, linear_palette
|
| 8 |
from sklearn.decomposition import PCA
|
| 9 |
from sklearn.manifold import TSNE, trustworthiness
|
| 10 |
from sklearn.metrics import pairwise_distances, silhouette_score
|
| 11 |
-
from sklearn.cluster import DBSCAN
|
| 12 |
from sklearn.preprocessing import MinMaxScaler
|
| 13 |
from sklearn.pipeline import Pipeline
|
| 14 |
from sklearn.base import BaseEstimator, TransformerMixin
|
|
@@ -614,6 +614,15 @@ def compute_global_regression(df_combined, embedding_cols, tsne_params, df_f1, r
|
|
| 614 |
silhouette_vals.append(silhouette_score(reduced_real, df_combined[real_mask]['label']))
|
| 615 |
|
| 616 |
silhouette = np.max(silhouette_vals)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 617 |
|
| 618 |
dfs_reduced, unique_subsets = split_versions(df_combined, reduced)
|
| 619 |
|
|
@@ -689,7 +698,8 @@ def compute_global_regression(df_combined, embedding_cols, tsne_params, df_f1, r
|
|
| 689 |
"explained_variance": explained_variance,
|
| 690 |
"trustworthiness": trust,
|
| 691 |
"continuity": cont,
|
| 692 |
-
"silhouette": silhouette
|
|
|
|
| 693 |
}
|
| 694 |
|
| 695 |
if reduction_method == "PCA":
|
|
@@ -828,6 +838,26 @@ def run_model(model_name):
|
|
| 828 |
st.write(f"Continuity: {result['continuity']:.2f}")
|
| 829 |
st.write(f"Silhouette Score: {result['silhouette']:.2f}")
|
| 830 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 831 |
# # Mostrar los plots de loadings si se us贸 PCA (para el conjunto combinado)
|
| 832 |
# if reduction_method == "PCA" and result.get("pca_model") is not None:
|
| 833 |
# # pca_model = result["pca_model"]
|
|
|
|
| 4 |
from bokeh.plotting import figure
|
| 5 |
from bokeh.models import ColumnDataSource, DataTable, TableColumn, CustomJS, Select, Button, HoverTool, LinearColorMapper, ColorBar, FuncTickFormatter, FixedTicker
|
| 6 |
from bokeh.layouts import column
|
| 7 |
+
from bokeh.palettes import Reds9, Blues9, Oranges9, Purples9, Greys9, BuGn9, Greens9, RdYlGn11, linear_palette, Category10
|
| 8 |
from sklearn.decomposition import PCA
|
| 9 |
from sklearn.manifold import TSNE, trustworthiness
|
| 10 |
from sklearn.metrics import pairwise_distances, silhouette_score
|
| 11 |
+
from sklearn.cluster import DBSCAN, KMeans
|
| 12 |
from sklearn.preprocessing import MinMaxScaler
|
| 13 |
from sklearn.pipeline import Pipeline
|
| 14 |
from sklearn.base import BaseEstimator, TransformerMixin
|
|
|
|
| 614 |
silhouette_vals.append(silhouette_score(reduced_real, df_combined[real_mask]['label']))
|
| 615 |
|
| 616 |
silhouette = np.max(silhouette_vals)
|
| 617 |
+
|
| 618 |
+
inertias = []
|
| 619 |
+
K = range(1, 20)
|
| 620 |
+
|
| 621 |
+
for k in K:
|
| 622 |
+
kmeans = KMeans(n_clusters=k, random_state=42)
|
| 623 |
+
kmeans.fit(reduced_real)
|
| 624 |
+
inertias.append(kmeans.inertia_)
|
| 625 |
+
|
| 626 |
|
| 627 |
dfs_reduced, unique_subsets = split_versions(df_combined, reduced)
|
| 628 |
|
|
|
|
| 698 |
"explained_variance": explained_variance,
|
| 699 |
"trustworthiness": trust,
|
| 700 |
"continuity": cont,
|
| 701 |
+
"silhouette": silhouette,
|
| 702 |
+
"inertias": inertias,
|
| 703 |
}
|
| 704 |
|
| 705 |
if reduction_method == "PCA":
|
|
|
|
| 838 |
st.write(f"Continuity: {result['continuity']:.2f}")
|
| 839 |
st.write(f"Silhouette Score: {result['silhouette']:.2f}")
|
| 840 |
|
| 841 |
+
st.subheader("Elbow Method (KMeans)")
|
| 842 |
+
if "inertias" in result and len(result["inertias"]) > 0:
|
| 843 |
+
K = np.arange(1, len(result["inertias"]) + 1)
|
| 844 |
+
inertias = np.array(result["inertias"])
|
| 845 |
+
|
| 846 |
+
source = ColumnDataSource(data=dict(K=K, inertias=inertias))
|
| 847 |
+
p = figure(title="M茅todo del Codo",
|
| 848 |
+
x_axis_label='N煤mero de Clusters (k)',
|
| 849 |
+
y_axis_label='Inercia (Within-Cluster Sum of Squares)',
|
| 850 |
+
width=600, height=400,
|
| 851 |
+
background_fill_color="#fafafa")
|
| 852 |
+
|
| 853 |
+
p.line('K', 'inertias', source=source, line_width=3, color=Category10[10][0])
|
| 854 |
+
p.circle('K', 'inertias', source=source, size=8, color=Category10[10][1], legend_label="Inertia")
|
| 855 |
+
|
| 856 |
+
p.legend.location = "top_right"
|
| 857 |
+
p.grid.visible = True
|
| 858 |
+
|
| 859 |
+
st.bokeh_chart(p, use_container_width=True)
|
| 860 |
+
|
| 861 |
# # Mostrar los plots de loadings si se us贸 PCA (para el conjunto combinado)
|
| 862 |
# if reduction_method == "PCA" and result.get("pca_model") is not None:
|
| 863 |
# # pca_model = result["pca_model"]
|