de-Rodrigo commited on
Commit
86538a4
·
1 Parent(s): 7536013

Try Silhouette Based on Features

Browse files
Files changed (1) hide show
  1. app.py +21 -12
app.py CHANGED
@@ -580,21 +580,30 @@ def compute_global_regression(df_combined, embedding_cols, tsne_params, df_f1, r
580
  cont = None
581
  silhouette = None
582
 
583
- if reduction_method in ("t-SNE","PCA"):
584
- X = df_combined[embedding_cols].values
585
- trust = trustworthiness(X, reduced, n_neighbors=TSNE_NEIGHBOURS)
586
- cont = compute_continuity(X, reduced, n_neighbors=TSNE_NEIGHBOURS)
587
 
588
- silhouette_clustering = DBSCAN(eps=0.1, min_samples=15).fit(reduced)
589
- silhouette_labels = silhouette_clustering.labels_
590
- print("Silhouette labels:", silhouette_labels)
591
-
592
- if len(set(silhouette_labels)) > 1:
593
- silhouette = silhouette_score(reduced, silhouette_labels)
594
- else:
595
- silhouette = -1
596
 
 
 
 
 
 
 
 
597
 
 
 
 
 
 
 
 
598
  dfs_reduced, unique_subsets = split_versions(df_combined, reduced)
599
 
600
  df_distances = compute_cluster_distances_synthetic_individual(
 
580
  cont = None
581
  silhouette = None
582
 
583
+ # if reduction_method in ("t-SNE","PCA"):
584
+ # X = df_combined[embedding_cols].values
585
+ # trust = trustworthiness(X, reduced, n_neighbors=TSNE_NEIGHBOURS)
586
+ # cont = compute_continuity(X, reduced, n_neighbors=TSNE_NEIGHBOURS)
587
 
588
+ # silhouette_clustering = DBSCAN(eps=0.1, min_samples=15).fit(reduced)
589
+ # silhouette_labels = silhouette_clustering.labels_
590
+ # print("Silhouette labels:", silhouette_labels)
 
 
 
 
 
591
 
592
+ # if len(set(silhouette_labels)) > 1:
593
+ # silhouette = silhouette_score(reduced, silhouette_labels)
594
+ # else:
595
+ # silhouette = -1
596
+
597
+ df_heat = pd.read_csv(f"data/heatmaps_donut.csv")
598
+ feature_options = [col for col in df_heat.columns if col != "name"]
599
 
600
+ silhouette_vals = []
601
+ for feature in feature_options:
602
+ silhouette = silhouette_score(reduced, feature)
603
+ silhouette_vals.append(silhouette)
604
+
605
+ silhouette = np.mean(silhouette_vals)
606
+
607
  dfs_reduced, unique_subsets = split_versions(df_combined, reduced)
608
 
609
  df_distances = compute_cluster_distances_synthetic_individual(