Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -346,7 +346,6 @@ def extract_problem_domains(df,
|
|
| 346 |
|
| 347 |
# console_messages.append(f"top_words: {top_words}, type: {type(top_words)}")
|
| 348 |
# console_messages.append(f"center.argsort(): {center.argsort()}, type: {type(center.argsort())}")
|
| 349 |
-
|
| 350 |
|
| 351 |
# # top_word_indices = center.argsort()[-top_words:][::-1]
|
| 352 |
# top_word_indices = center.argsort()[-top_words:][::-1].tolist() # Indexes of top words
|
|
@@ -354,24 +353,33 @@ def extract_problem_domains(df,
|
|
| 354 |
# top_words = [feature_names[index] for index in top_word_indices]
|
| 355 |
# cluster_representations[i] = top_words
|
| 356 |
|
| 357 |
-
|
| 358 |
try:
|
| 359 |
center = kmeans.cluster_centers_[i]
|
| 360 |
# console_messages.append(f"Processing cluster {i}")
|
| 361 |
# console_messages.append(f"Center shape: {center.shape}, type: {type(center)}")
|
| 362 |
|
| 363 |
-
|
| 364 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 365 |
|
| 366 |
-
top_words = [feature_names[index] for index in top_word_indices]
|
| 367 |
-
# console_messages.append(f"Top words: {top_words}")
|
| 368 |
|
| 369 |
-
cluster_representations[i] = top_words
|
| 370 |
except Exception as e:
|
| 371 |
console_messages.append(f"Error processing cluster {i}: {str(e)}")
|
| 372 |
console_messages.append(f"Center: {center}")
|
| 373 |
|
| 374 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 375 |
console_messages.append(f"Number of clusters: {optimal_n_clusters}")
|
| 376 |
console_messages.append(f"Sample cluster words: {cluster_representations[0][:5]}...")
|
| 377 |
|
|
@@ -390,12 +398,7 @@ def extract_problem_domains(df,
|
|
| 390 |
|
| 391 |
|
| 392 |
|
| 393 |
-
|
| 394 |
-
|
| 395 |
-
|
| 396 |
-
|
| 397 |
-
|
| 398 |
-
|
| 399 |
|
| 400 |
|
| 401 |
# def nlp_pipeline(original_df):
|
|
|
|
| 346 |
|
| 347 |
# console_messages.append(f"top_words: {top_words}, type: {type(top_words)}")
|
| 348 |
# console_messages.append(f"center.argsort(): {center.argsort()}, type: {type(center.argsort())}")
|
|
|
|
| 349 |
|
| 350 |
# # top_word_indices = center.argsort()[-top_words:][::-1]
|
| 351 |
# top_word_indices = center.argsort()[-top_words:][::-1].tolist() # Indexes of top words
|
|
|
|
| 353 |
# top_words = [feature_names[index] for index in top_word_indices]
|
| 354 |
# cluster_representations[i] = top_words
|
| 355 |
|
|
|
|
| 356 |
try:
|
| 357 |
center = kmeans.cluster_centers_[i]
|
| 358 |
# console_messages.append(f"Processing cluster {i}")
|
| 359 |
# console_messages.append(f"Center shape: {center.shape}, type: {type(center)}")
|
| 360 |
|
| 361 |
+
if isinstance(center, np.ndarray) and center.ndim == 1:
|
| 362 |
+
top_word_indices = center.argsort()[-top_words:][::-1]
|
| 363 |
+
# top_word_indices = center.argsort()[-top_words:][::-1].tolist()
|
| 364 |
+
|
| 365 |
+
console_messages.append(f"Top word indices for cluster {i}: {top_word_indices}")
|
| 366 |
+
top_words = [feature_names[index] for index in top_word_indices]
|
| 367 |
+
console_messages.append(f"Top words: {top_words}")
|
| 368 |
+
cluster_representations[i] = top_words
|
| 369 |
+
else:
|
| 370 |
+
console_messages.append(f"Error: Cluster center is not a 1D array for cluster {i}")
|
| 371 |
|
|
|
|
|
|
|
| 372 |
|
|
|
|
| 373 |
except Exception as e:
|
| 374 |
console_messages.append(f"Error processing cluster {i}: {str(e)}")
|
| 375 |
console_messages.append(f"Center: {center}")
|
| 376 |
|
| 377 |
|
| 378 |
+
try:
|
| 379 |
+
center = kmeans.cluster_centers_[i]
|
| 380 |
+
print(f"Center for cluster {i}: {center}")
|
| 381 |
+
|
| 382 |
+
|
| 383 |
console_messages.append(f"Number of clusters: {optimal_n_clusters}")
|
| 384 |
console_messages.append(f"Sample cluster words: {cluster_representations[0][:5]}...")
|
| 385 |
|
|
|
|
| 398 |
|
| 399 |
|
| 400 |
|
| 401 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 402 |
|
| 403 |
|
| 404 |
# def nlp_pipeline(original_df):
|