Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -294,20 +294,42 @@ def extract_problem_domains(df,
|
|
| 294 |
feature_names = vectorizer.get_feature_names_out()
|
| 295 |
cluster_representations = {}
|
| 296 |
for i in range(optimal_n_clusters):
|
| 297 |
-
center = kmeans.cluster_centers_[i]
|
| 298 |
|
| 299 |
-
# print(f"top_words: {top_words}, type: {type(top_words)}")
|
| 300 |
-
# print(f"center.argsort(): {center.argsort()}, type: {type(center.argsort())}")
|
| 301 |
|
| 302 |
-
console_messages.append(f"top_words: {top_words}, type: {type(top_words)}")
|
| 303 |
-
console_messages.append(f"center.argsort(): {center.argsort()}, type: {type(center.argsort())}")
|
| 304 |
|
| 305 |
|
| 306 |
-
# top_word_indices = center.argsort()[-top_words:][::-1]
|
| 307 |
-
top_word_indices = center.argsort()[-top_words:][::-1].tolist() # Indexes of top words
|
| 308 |
|
| 309 |
-
top_words = [feature_names[index] for index in top_word_indices]
|
| 310 |
-
cluster_representations[i] = top_words
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 311 |
|
| 312 |
# Map cluster labels to representative words
|
| 313 |
df["Problem_Cluster"] = cluster_labels
|
|
|
|
| 294 |
feature_names = vectorizer.get_feature_names_out()
|
| 295 |
cluster_representations = {}
|
| 296 |
for i in range(optimal_n_clusters):
|
| 297 |
+
# center = kmeans.cluster_centers_[i]
|
| 298 |
|
| 299 |
+
# # print(f"top_words: {top_words}, type: {type(top_words)}")
|
| 300 |
+
# # print(f"center.argsort(): {center.argsort()}, type: {type(center.argsort())}")
|
| 301 |
|
| 302 |
+
# console_messages.append(f"top_words: {top_words}, type: {type(top_words)}")
|
| 303 |
+
# console_messages.append(f"center.argsort(): {center.argsort()}, type: {type(center.argsort())}")
|
| 304 |
|
| 305 |
|
| 306 |
+
# # top_word_indices = center.argsort()[-top_words:][::-1]
|
| 307 |
+
# top_word_indices = center.argsort()[-top_words:][::-1].tolist() # Indexes of top words
|
| 308 |
|
| 309 |
+
# top_words = [feature_names[index] for index in top_word_indices]
|
| 310 |
+
# cluster_representations[i] = top_words
|
| 311 |
+
|
| 312 |
+
|
| 313 |
+
try:
|
| 314 |
+
center = kmeans.cluster_centers_[i]
|
| 315 |
+
console_messages.append(f"Processing cluster {i}")
|
| 316 |
+
console_messages.append(f"Center shape: {center.shape}, type: {type(center)}")
|
| 317 |
+
|
| 318 |
+
top_word_indices = center.argsort()[-top_words:][::-1].tolist()
|
| 319 |
+
console_messages.append(f"Top word indices: {top_word_indices}")
|
| 320 |
+
|
| 321 |
+
top_words = [feature_names[index] for index in top_word_indices]
|
| 322 |
+
console_messages.append(f"Top words: {top_words}")
|
| 323 |
+
|
| 324 |
+
cluster_representations[i] = top_words
|
| 325 |
+
except Exception as e:
|
| 326 |
+
console_messages.append(f"Error processing cluster {i}: {str(e)}")
|
| 327 |
+
console_messages.append(f"Center: {center}")
|
| 328 |
+
|
| 329 |
+
|
| 330 |
+
|
| 331 |
+
|
| 332 |
+
|
| 333 |
|
| 334 |
# Map cluster labels to representative words
|
| 335 |
df["Problem_Cluster"] = cluster_labels
|