Milad Alshomary commited on
Commit
95d09b1
·
1 Parent(s): 0e62aa1
Files changed (1) hide show
  1. utils/clustering_utils.py +4 -0
utils/clustering_utils.py CHANGED
@@ -45,12 +45,16 @@ def _calculate_silhouette_score(X: np.ndarray, labels: np.ndarray, metric: str)
45
  unique_labels_set = set(labels)
46
  n_clusters_ = len(unique_labels_set) - (1 if -1 in unique_labels_set else 0)
47
 
 
 
48
  if n_clusters_ > 1:
 
49
  clustered_mask = (labels != -1)
50
  if np.sum(clustered_mask) > 1:
51
  X_clustered = X[clustered_mask]
52
  labels_clustered = labels[clustered_mask]
53
  try:
 
54
  return silhouette_score(X_clustered, labels_clustered, metric=metric)
55
  except ValueError:
56
  return None
 
45
  unique_labels_set = set(labels)
46
  n_clusters_ = len(unique_labels_set) - (1 if -1 in unique_labels_set else 0)
47
 
48
+ # The silhouette score is only defined if there is more than 1 cluster.
49
+ # Outliers (label -1) are excluded from the score calculation.
50
  if n_clusters_ > 1:
51
+ # Create a mask to select only points that are part of a cluster (not noise)
52
  clustered_mask = (labels != -1)
53
  if np.sum(clustered_mask) > 1:
54
  X_clustered = X[clustered_mask]
55
  labels_clustered = labels[clustered_mask]
56
  try:
57
+ # Compute the score on the non-outlier points
58
  return silhouette_score(X_clustered, labels_clustered, metric=metric)
59
  except ValueError:
60
  return None