Milad Alshomary
commited on
Commit
·
95d09b1
1
Parent(s):
0e62aa1
updates
Browse files
utils/clustering_utils.py
CHANGED
|
@@ -45,12 +45,16 @@ def _calculate_silhouette_score(X: np.ndarray, labels: np.ndarray, metric: str)
|
|
| 45 |
unique_labels_set = set(labels)
|
| 46 |
n_clusters_ = len(unique_labels_set) - (1 if -1 in unique_labels_set else 0)
|
| 47 |
|
|
|
|
|
|
|
| 48 |
if n_clusters_ > 1:
|
|
|
|
| 49 |
clustered_mask = (labels != -1)
|
| 50 |
if np.sum(clustered_mask) > 1:
|
| 51 |
X_clustered = X[clustered_mask]
|
| 52 |
labels_clustered = labels[clustered_mask]
|
| 53 |
try:
|
|
|
|
| 54 |
return silhouette_score(X_clustered, labels_clustered, metric=metric)
|
| 55 |
except ValueError:
|
| 56 |
return None
|
|
|
|
| 45 |
unique_labels_set = set(labels)
|
| 46 |
n_clusters_ = len(unique_labels_set) - (1 if -1 in unique_labels_set else 0)
|
| 47 |
|
| 48 |
+
# The silhouette score is only defined if there is more than 1 cluster.
|
| 49 |
+
# Outliers (label -1) are excluded from the score calculation.
|
| 50 |
if n_clusters_ > 1:
|
| 51 |
+
# Create a mask to select only points that are part of a cluster (not noise)
|
| 52 |
clustered_mask = (labels != -1)
|
| 53 |
if np.sum(clustered_mask) > 1:
|
| 54 |
X_clustered = X[clustered_mask]
|
| 55 |
labels_clustered = labels[clustered_mask]
|
| 56 |
try:
|
| 57 |
+
# Compute the score on the non-outlier points
|
| 58 |
return silhouette_score(X_clustered, labels_clustered, metric=metric)
|
| 59 |
except ValueError:
|
| 60 |
return None
|