Spaces:
Sleeping
Sleeping
Commit
·
d419a1f
1
Parent(s):
c47c05d
Improve Silhouette Computation
Browse files
app.py
CHANGED
|
@@ -8,6 +8,7 @@ from bokeh.palettes import Reds9, Blues9, Oranges9, Purples9, Greys9, BuGn9, Gre
|
|
| 8 |
from sklearn.decomposition import PCA
|
| 9 |
from sklearn.manifold import TSNE, trustworthiness
|
| 10 |
from sklearn.metrics import pairwise_distances, silhouette_score
|
|
|
|
| 11 |
from sklearn.preprocessing import MinMaxScaler
|
| 12 |
from sklearn.pipeline import Pipeline
|
| 13 |
from sklearn.base import BaseEstimator, TransformerMixin
|
|
@@ -578,11 +579,18 @@ def compute_global_regression(df_combined, embedding_cols, tsne_params, df_f1, r
|
|
| 578 |
trust = None
|
| 579 |
cont = None
|
| 580 |
silhouette = None
|
|
|
|
| 581 |
if reduction_method in ("t-SNE","PCA"):
|
| 582 |
X = df_combined[embedding_cols].values
|
| 583 |
trust = trustworthiness(X, reduced, n_neighbors=TSNE_NEIGHBOURS)
|
| 584 |
cont = compute_continuity(X, reduced, n_neighbors=TSNE_NEIGHBOURS)
|
| 585 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 586 |
|
| 587 |
dfs_reduced, unique_subsets = split_versions(df_combined, reduced)
|
| 588 |
|
|
|
|
| 8 |
from sklearn.decomposition import PCA
|
| 9 |
from sklearn.manifold import TSNE, trustworthiness
|
| 10 |
from sklearn.metrics import pairwise_distances, silhouette_score
|
| 11 |
+
from sklearn.cluster import DBSCAN
|
| 12 |
from sklearn.preprocessing import MinMaxScaler
|
| 13 |
from sklearn.pipeline import Pipeline
|
| 14 |
from sklearn.base import BaseEstimator, TransformerMixin
|
|
|
|
| 579 |
trust = None
|
| 580 |
cont = None
|
| 581 |
silhouette = None
|
| 582 |
+
|
| 583 |
if reduction_method in ("t-SNE","PCA"):
|
| 584 |
X = df_combined[embedding_cols].values
|
| 585 |
trust = trustworthiness(X, reduced, n_neighbors=TSNE_NEIGHBOURS)
|
| 586 |
cont = compute_continuity(X, reduced, n_neighbors=TSNE_NEIGHBOURS)
|
| 587 |
+
|
| 588 |
+
silhouette_clustering = DBSCAN(eps=0.5, min_samples=7).fit(reduced)
|
| 589 |
+
silhouette_labels = silhouette_clustering.labels_
|
| 590 |
+
|
| 591 |
+
if len(set(silhouette_labels)) > 1:
|
| 592 |
+
silhouette = silhouette_score(reduced, silhouette_labels)
|
| 593 |
+
|
| 594 |
|
| 595 |
dfs_reduced, unique_subsets = split_versions(df_combined, reduced)
|
| 596 |
|