Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -274,7 +274,7 @@ def extract_problem_domains(df,
|
|
| 274 |
optimal_n_clusters = cluster_range[0] + silhouette_scores.index(max(silhouette_scores))
|
| 275 |
|
| 276 |
# Perform final clustering with optimal number of clusters
|
| 277 |
-
kmeans = KMeans(n_clusters=optimal_n_clusters
|
| 278 |
cluster_labels = kmeans.fit_predict(X)
|
| 279 |
|
| 280 |
# # BERTopic approach (commented out)
|
|
@@ -312,14 +312,14 @@ def extract_problem_domains(df,
|
|
| 312 |
|
| 313 |
try:
|
| 314 |
center = kmeans.cluster_centers_[i]
|
| 315 |
-
console_messages.append(f"Processing cluster {i}")
|
| 316 |
-
console_messages.append(f"Center shape: {center.shape}, type: {type(center)}")
|
| 317 |
|
| 318 |
top_word_indices = center.argsort()[-top_words:][::-1].tolist()
|
| 319 |
-
console_messages.append(f"Top word indices: {top_word_indices}")
|
| 320 |
|
| 321 |
top_words = [feature_names[index] for index in top_word_indices]
|
| 322 |
-
console_messages.append(f"Top words: {top_words}")
|
| 323 |
|
| 324 |
cluster_representations[i] = top_words
|
| 325 |
except Exception as e:
|
|
@@ -327,15 +327,15 @@ def extract_problem_domains(df,
|
|
| 327 |
console_messages.append(f"Center: {center}")
|
| 328 |
|
| 329 |
|
| 330 |
-
|
| 331 |
-
|
| 332 |
-
|
| 333 |
|
| 334 |
# Map cluster labels to representative words
|
| 335 |
df["Problem_Cluster"] = cluster_labels
|
| 336 |
df['Problem_Category_Words'] = [cluster_representations[label] for label in cluster_labels]
|
| 337 |
|
| 338 |
-
console_messages.append("Returning from Problem Domain Extraction function.")
|
|
|
|
| 339 |
return df, optimal_n_clusters
|
| 340 |
|
| 341 |
|
|
|
|
| 274 |
optimal_n_clusters = cluster_range[0] + silhouette_scores.index(max(silhouette_scores))
|
| 275 |
|
| 276 |
# Perform final clustering with optimal number of clusters
|
| 277 |
+
kmeans = KMeans(n_clusters=optimal_n_clusters) #, random_state=42)
|
| 278 |
cluster_labels = kmeans.fit_predict(X)
|
| 279 |
|
| 280 |
# # BERTopic approach (commented out)
|
|
|
|
| 312 |
|
| 313 |
try:
|
| 314 |
center = kmeans.cluster_centers_[i]
|
| 315 |
+
# console_messages.append(f"Processing cluster {i}")
|
| 316 |
+
# console_messages.append(f"Center shape: {center.shape}, type: {type(center)}")
|
| 317 |
|
| 318 |
top_word_indices = center.argsort()[-top_words:][::-1].tolist()
|
| 319 |
+
# console_messages.append(f"Top word indices: {top_word_indices}")
|
| 320 |
|
| 321 |
top_words = [feature_names[index] for index in top_word_indices]
|
| 322 |
+
# console_messages.append(f"Top words: {top_words}")
|
| 323 |
|
| 324 |
cluster_representations[i] = top_words
|
| 325 |
except Exception as e:
|
|
|
|
| 327 |
console_messages.append(f"Center: {center}")
|
| 328 |
|
| 329 |
|
| 330 |
+
console_messages.append(f"Number of clusters: {optimal_n_clusters}")
|
| 331 |
+
console_messages.append(f"Sample cluster words: {cluster_representations[0][:5]}...")
|
|
|
|
| 332 |
|
| 333 |
# Map cluster labels to representative words
|
| 334 |
df["Problem_Cluster"] = cluster_labels
|
| 335 |
df['Problem_Category_Words'] = [cluster_representations[label] for label in cluster_labels]
|
| 336 |
|
| 337 |
+
# console_messages.append("Returning from Problem Domain Extraction function.")
|
| 338 |
+
console_messages.append("Problem Domain Extraction completed.")
|
| 339 |
return df, optimal_n_clusters
|
| 340 |
|
| 341 |
|