Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -241,8 +241,8 @@ import numpy as np
|
|
| 241 |
def extract_problem_domains(df,
|
| 242 |
text_column='Processed_ProblemDescription_forDomainExtraction',
|
| 243 |
# text_column='Problem_Description',
|
| 244 |
-
cluster_range=(10,
|
| 245 |
-
top_words=
|
| 246 |
method='sentence_transformers'
|
| 247 |
# method='tfidf_kmeans'
|
| 248 |
):
|
|
@@ -375,10 +375,13 @@ def nlp_pipeline(original_df):
|
|
| 375 |
|
| 376 |
|
| 377 |
# Starting the Pipeline for Domain Extraction
|
| 378 |
-
console_messages.append("
|
| 379 |
# Apply the text_processing_for_domain function to the DataFrame
|
| 380 |
processed_df['Processed_ProblemDescription_forDomainExtraction'] = processed_df['Problem_Description'].apply(text_processing_for_domain)
|
| 381 |
-
|
|
|
|
|
|
|
|
|
|
| 382 |
|
| 383 |
|
| 384 |
# Domain Clustering
|
|
@@ -459,7 +462,7 @@ interface = gr.Interface(
|
|
| 459 |
|
| 460 |
outputs=[
|
| 461 |
gr.File(label="Download the processed Excel File containing the ** Project Proposals ** for each Location~Problem paired combination"), # File download output
|
| 462 |
-
gr.Textbox(label="Console Messages", lines=
|
| 463 |
],
|
| 464 |
|
| 465 |
|
|
|
|
| 241 |
def extract_problem_domains(df,
|
| 242 |
text_column='Processed_ProblemDescription_forDomainExtraction',
|
| 243 |
# text_column='Problem_Description',
|
| 244 |
+
cluster_range=(10, 25),
|
| 245 |
+
top_words=17,
|
| 246 |
method='sentence_transformers'
|
| 247 |
# method='tfidf_kmeans'
|
| 248 |
):
|
|
|
|
| 375 |
|
| 376 |
|
| 377 |
# Starting the Pipeline for Domain Extraction
|
| 378 |
+
console_messages.append("Executing Text processing function for Domain identification")
|
| 379 |
# Apply the text_processing_for_domain function to the DataFrame
|
| 380 |
processed_df['Processed_ProblemDescription_forDomainExtraction'] = processed_df['Problem_Description'].apply(text_processing_for_domain)
|
| 381 |
+
|
| 382 |
+
console_messages.append("Removing entries which could not be allocated to any Problem Domain")
|
| 383 |
+
processed_df = processed_df.dropna(subset=['Processed_ProblemDescription_forDomainExtraction'], axis=0)
|
| 384 |
+
|
| 385 |
|
| 386 |
|
| 387 |
# Domain Clustering
|
|
|
|
| 462 |
|
| 463 |
outputs=[
|
| 464 |
gr.File(label="Download the processed Excel File containing the ** Project Proposals ** for each Location~Problem paired combination"), # File download output
|
| 465 |
+
gr.Textbox(label="Console Messages", lines=25, interactive=False) # Console messages output
|
| 466 |
],
|
| 467 |
|
| 468 |
|