Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,7 +1,8 @@
|
|
| 1 |
import gradio as gr
|
| 2 |
import pandas as pd
|
| 3 |
|
| 4 |
-
def data_pre_processing(file_responses):
|
|
|
|
| 5 |
# Financial Weights can be anything (ultimately the row-wise weights are aggregated and the corresponding fractions are obtained from that rows' total tax payed)
|
| 6 |
|
| 7 |
try: # Define the columns to be processed
|
|
@@ -62,11 +63,14 @@ def data_pre_processing(file_responses):
|
|
| 62 |
|
| 63 |
|
| 64 |
# Different return can be used to check the processing
|
|
|
|
| 65 |
# return file_responses
|
| 66 |
-
return merged_dataset
|
| 67 |
|
| 68 |
except Exception as e:
|
| 69 |
-
|
|
|
|
|
|
|
| 70 |
|
| 71 |
|
| 72 |
|
|
@@ -225,8 +229,8 @@ def extract_problem_domains(df,
|
|
| 225 |
text_column='Problem_Description',
|
| 226 |
cluster_range=(10, 50),
|
| 227 |
top_words=17,
|
| 228 |
-
|
| 229 |
-
method='tfidf_kmeans'
|
| 230 |
):
|
| 231 |
|
| 232 |
|
|
@@ -317,11 +321,12 @@ def extract_problem_domains(df,
|
|
| 317 |
|
| 318 |
|
| 319 |
|
| 320 |
-
|
| 321 |
-
|
| 322 |
-
|
|
|
|
| 323 |
# Data Preprocessing
|
| 324 |
-
processed_df = data_pre_processing(original_df) # merged_dataset
|
| 325 |
|
| 326 |
|
| 327 |
# Starting the Pipeline for Domain Extraction
|
|
@@ -344,10 +349,9 @@ def nlp_pipeline(original_df):
|
|
| 344 |
# location_clusters, location_model = perform_clustering(processed_df['Geographical_Location'], n_clusters=5)
|
| 345 |
|
| 346 |
|
| 347 |
-
|
| 348 |
-
|
| 349 |
# return processed_df
|
| 350 |
-
return domain_df,
|
| 351 |
|
| 352 |
|
| 353 |
def process_excel(file):
|
|
@@ -362,7 +366,7 @@ def process_excel(file):
|
|
| 362 |
|
| 363 |
# Process the DataFrame
|
| 364 |
console_messages.append("Processing the DataFrame...")
|
| 365 |
-
result_df, console_messages = nlp_pipeline(df)
|
| 366 |
|
| 367 |
# output_file = "Output_ProjectProposals.xlsx"
|
| 368 |
output_file = "Output_Proposals.xlsx"
|
|
|
|
| 1 |
import gradio as gr
|
| 2 |
import pandas as pd
|
| 3 |
|
| 4 |
+
def data_pre_processing(file_responses, console_messages):
|
| 5 |
+
console_messages.append("Starting data pre-processing...")
|
| 6 |
# Financial Weights can be anything (ultimately the row-wise weights are aggregated and the corresponding fractions are obtained from that rows' total tax payed)
|
| 7 |
|
| 8 |
try: # Define the columns to be processed
|
|
|
|
| 63 |
|
| 64 |
|
| 65 |
# Different return can be used to check the processing
|
| 66 |
+
console_messages.append("Data pre-processing completed.")
|
| 67 |
# return file_responses
|
| 68 |
+
return merged_dataset, console_messages
|
| 69 |
|
| 70 |
except Exception as e:
|
| 71 |
+
console_messages.append(f"Error during data pre-processing: {str(e)}")
|
| 72 |
+
# return str(e), console_messages
|
| 73 |
+
return None, console_messages
|
| 74 |
|
| 75 |
|
| 76 |
|
|
|
|
| 229 |
text_column='Problem_Description',
|
| 230 |
cluster_range=(10, 50),
|
| 231 |
top_words=17,
|
| 232 |
+
method='sentence_transformers'
|
| 233 |
+
# method='tfidf_kmeans'
|
| 234 |
):
|
| 235 |
|
| 236 |
|
|
|
|
| 321 |
|
| 322 |
|
| 323 |
|
| 324 |
+
# def nlp_pipeline(original_df):
|
| 325 |
+
def nlp_pipeline(original_df, console_messages):
|
| 326 |
+
console_messages.append("Starting NLP pipeline...")
|
| 327 |
+
|
| 328 |
# Data Preprocessing
|
| 329 |
+
processed_df, console_messages = data_pre_processing(original_df, console_messages) # merged_dataset
|
| 330 |
|
| 331 |
|
| 332 |
# Starting the Pipeline for Domain Extraction
|
|
|
|
| 349 |
# location_clusters, location_model = perform_clustering(processed_df['Geographical_Location'], n_clusters=5)
|
| 350 |
|
| 351 |
|
| 352 |
+
console_messages.append("NLP pipeline completed.")
|
|
|
|
| 353 |
# return processed_df
|
| 354 |
+
return domain_df, console_messages
|
| 355 |
|
| 356 |
|
| 357 |
def process_excel(file):
|
|
|
|
| 366 |
|
| 367 |
# Process the DataFrame
|
| 368 |
console_messages.append("Processing the DataFrame...")
|
| 369 |
+
result_df, console_messages = nlp_pipeline(df, console_messages)
|
| 370 |
|
| 371 |
# output_file = "Output_ProjectProposals.xlsx"
|
| 372 |
output_file = "Output_Proposals.xlsx"
|