Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -229,12 +229,13 @@ def extract_problem_domains(df,
|
|
| 229 |
text_column='Problem_Description',
|
| 230 |
cluster_range=(10, 50),
|
| 231 |
top_words=17,
|
| 232 |
-
# method='sentence_transformers'
|
| 233 |
-
method='tfidf_kmeans'
|
|
|
|
| 234 |
):
|
| 235 |
|
| 236 |
|
| 237 |
-
|
| 238 |
|
| 239 |
if method == 'sentence_transformers':
|
| 240 |
# Sentence Transformers approach
|
|
@@ -294,6 +295,12 @@ def extract_problem_domains(df,
|
|
| 294 |
for i in range(optimal_n_clusters):
|
| 295 |
center = kmeans.cluster_centers_[i]
|
| 296 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 297 |
|
| 298 |
|
| 299 |
# top_word_indices = center.argsort()[-top_words:][::-1]
|
|
@@ -306,6 +313,7 @@ def extract_problem_domains(df,
|
|
| 306 |
df["Problem_Cluster"] = cluster_labels
|
| 307 |
df['Problem_Category_Words'] = [cluster_representations[label] for label in cluster_labels]
|
| 308 |
|
|
|
|
| 309 |
return df, optimal_n_clusters
|
| 310 |
|
| 311 |
|
|
@@ -338,7 +346,7 @@ def nlp_pipeline(original_df, console_messages):
|
|
| 338 |
|
| 339 |
# Domain Clustering
|
| 340 |
try:
|
| 341 |
-
domain_df, optimal_n_clusters = extract_problem_domains(processed_df)
|
| 342 |
# print(f"Optimal clusters: {optimal_clusters}")
|
| 343 |
# print(result_df.head())
|
| 344 |
# console_messages.append(f"Optimal clusters: {optimal_n_clusters}")
|
|
|
|
| 229 |
text_column='Problem_Description',
|
| 230 |
cluster_range=(10, 50),
|
| 231 |
top_words=17,
|
| 232 |
+
# method='sentence_transformers',
|
| 233 |
+
method='tfidf_kmeans',
|
| 234 |
+
console_messages
|
| 235 |
):
|
| 236 |
|
| 237 |
|
| 238 |
+
console_messages.append("Extracting Problem Domains...")
|
| 239 |
|
| 240 |
if method == 'sentence_transformers':
|
| 241 |
# Sentence Transformers approach
|
|
|
|
| 295 |
for i in range(optimal_n_clusters):
|
| 296 |
center = kmeans.cluster_centers_[i]
|
| 297 |
|
| 298 |
+
# print(f"top_words: {top_words}, type: {type(top_words)}")
|
| 299 |
+
# print(f"center.argsort(): {center.argsort()}, type: {type(center.argsort())}")
|
| 300 |
+
|
| 301 |
+
console_messages.append(f"top_words: {top_words}, type: {type(top_words)}",
|
| 302 |
+
f"center.argsort(): {center.argsort()}, type: {type(center.argsort())}"
|
| 303 |
+
)
|
| 304 |
|
| 305 |
|
| 306 |
# top_word_indices = center.argsort()[-top_words:][::-1]
|
|
|
|
| 313 |
df["Problem_Cluster"] = cluster_labels
|
| 314 |
df['Problem_Category_Words'] = [cluster_representations[label] for label in cluster_labels]
|
| 315 |
|
| 316 |
+
console_messages.append("Returning from Problem Domain Extraction function.")
|
| 317 |
return df, optimal_n_clusters
|
| 318 |
|
| 319 |
|
|
|
|
| 346 |
|
| 347 |
# Domain Clustering
|
| 348 |
try:
|
| 349 |
+
domain_df, optimal_n_clusters = extract_problem_domains(processed_df, console_messages)
|
| 350 |
# print(f"Optimal clusters: {optimal_clusters}")
|
| 351 |
# print(result_df.head())
|
| 352 |
# console_messages.append(f"Optimal clusters: {optimal_n_clusters}")
|