de-Rodrigo commited on
Commit
d419a1f
·
1 Parent(s): c47c05d

Improve Silhouette Computation

Browse files
Files changed (1) hide show
  1. app.py +9 -1
app.py CHANGED
@@ -8,6 +8,7 @@ from bokeh.palettes import Reds9, Blues9, Oranges9, Purples9, Greys9, BuGn9, Gre
8
  from sklearn.decomposition import PCA
9
  from sklearn.manifold import TSNE, trustworthiness
10
  from sklearn.metrics import pairwise_distances, silhouette_score
 
11
  from sklearn.preprocessing import MinMaxScaler
12
  from sklearn.pipeline import Pipeline
13
  from sklearn.base import BaseEstimator, TransformerMixin
@@ -578,11 +579,18 @@ def compute_global_regression(df_combined, embedding_cols, tsne_params, df_f1, r
578
  trust = None
579
  cont = None
580
  silhouette = None
 
581
  if reduction_method in ("t-SNE","PCA"):
582
  X = df_combined[embedding_cols].values
583
  trust = trustworthiness(X, reduced, n_neighbors=TSNE_NEIGHBOURS)
584
  cont = compute_continuity(X, reduced, n_neighbors=TSNE_NEIGHBOURS)
585
- silhouette = silhouette_score(reduced, df_combined['label'])
 
 
 
 
 
 
586
 
587
  dfs_reduced, unique_subsets = split_versions(df_combined, reduced)
588
 
 
8
  from sklearn.decomposition import PCA
9
  from sklearn.manifold import TSNE, trustworthiness
10
  from sklearn.metrics import pairwise_distances, silhouette_score
11
+ from sklearn.cluster import DBSCAN
12
  from sklearn.preprocessing import MinMaxScaler
13
  from sklearn.pipeline import Pipeline
14
  from sklearn.base import BaseEstimator, TransformerMixin
 
579
  trust = None
580
  cont = None
581
  silhouette = None
582
+
583
  if reduction_method in ("t-SNE","PCA"):
584
  X = df_combined[embedding_cols].values
585
  trust = trustworthiness(X, reduced, n_neighbors=TSNE_NEIGHBOURS)
586
  cont = compute_continuity(X, reduced, n_neighbors=TSNE_NEIGHBOURS)
587
+
588
+ silhouette_clustering = DBSCAN(eps=0.5, min_samples=7).fit(reduced)
589
+ silhouette_labels = silhouette_clustering.labels_
590
+
591
+ if len(set(silhouette_labels)) > 1:
592
+ silhouette = silhouette_score(reduced, silhouette_labels)
593
+
594
 
595
  dfs_reduced, unique_subsets = split_versions(df_combined, reduced)
596