Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -327,9 +327,18 @@ def nlp_pipeline(original_df):
|
|
| 327 |
# Starting the Pipeline for Domain Extraction
|
| 328 |
# Apply the text_processing_for_domain function to the DataFrame
|
| 329 |
processed_df['Processed_ProblemDescription_forDomainExtraction'] = processed_df['Problem_Description'].apply(text_processing_for_domain)
|
|
|
|
|
|
|
| 330 |
# Domain Clustering
|
| 331 |
domain_df, optimal_n_clusters = extract_problem_domains(processed_df)
|
| 332 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 333 |
|
| 334 |
# problem_clusters, problem_model = perform_clustering(processed_df['Problem_Description'], n_clusters=10)
|
| 335 |
# location_clusters, location_model = perform_clustering(processed_df['Geographical_Location'], n_clusters=5)
|
|
@@ -338,30 +347,37 @@ def nlp_pipeline(original_df):
|
|
| 338 |
|
| 339 |
|
| 340 |
# return processed_df
|
| 341 |
-
return domain_df
|
| 342 |
|
| 343 |
|
| 344 |
def process_excel(file):
|
|
|
|
|
|
|
| 345 |
try:
|
| 346 |
# Ensure the file path is correct
|
|
|
|
| 347 |
file_path = file.name if hasattr(file, 'name') else file
|
| 348 |
# Read the Excel file
|
| 349 |
df = pd.read_excel(file_path)
|
| 350 |
|
| 351 |
# Process the DataFrame
|
| 352 |
-
|
|
|
|
| 353 |
|
| 354 |
# output_file = "Output_ProjectProposals.xlsx"
|
| 355 |
output_file = "Output_Proposals.xlsx"
|
| 356 |
result_df.to_excel(output_file, index=False)
|
| 357 |
-
|
| 358 |
-
|
|
|
|
| 359 |
|
| 360 |
except Exception as e:
|
| 361 |
# return str(e) # Return the error message
|
| 362 |
-
error_message = f"Error processing file: {str(e)}"
|
| 363 |
-
print(error_message) # Log the error
|
| 364 |
-
|
|
|
|
|
|
|
| 365 |
|
| 366 |
|
| 367 |
|
|
|
|
| 327 |
# Starting the Pipeline for Domain Extraction
|
| 328 |
# Apply the text_processing_for_domain function to the DataFrame
|
| 329 |
processed_df['Processed_ProblemDescription_forDomainExtraction'] = processed_df['Problem_Description'].apply(text_processing_for_domain)
|
| 330 |
+
|
| 331 |
+
|
| 332 |
# Domain Clustering
|
| 333 |
domain_df, optimal_n_clusters = extract_problem_domains(processed_df)
|
| 334 |
|
| 335 |
+
try:
|
| 336 |
+
domain_df, optimal_n_clusters = extract_problem_domains(df, method='tfidf_kmeans')
|
| 337 |
+
print(f"Optimal clusters: {optimal_clusters}")
|
| 338 |
+
print(result_df.head())
|
| 339 |
+
except Exception as e:
|
| 340 |
+
print(f"Error in extract_problem_domains: {e}")
|
| 341 |
+
|
| 342 |
|
| 343 |
# problem_clusters, problem_model = perform_clustering(processed_df['Problem_Description'], n_clusters=10)
|
| 344 |
# location_clusters, location_model = perform_clustering(processed_df['Geographical_Location'], n_clusters=5)
|
|
|
|
| 347 |
|
| 348 |
|
| 349 |
# return processed_df
|
| 350 |
+
return domain_df, "NLP Pipeline"
|
| 351 |
|
| 352 |
|
| 353 |
def process_excel(file):
|
| 354 |
+
console_messages = []
|
| 355 |
+
|
| 356 |
try:
|
| 357 |
# Ensure the file path is correct
|
| 358 |
+
console_messages.append("Reading the uploaded Excel file...")
|
| 359 |
file_path = file.name if hasattr(file, 'name') else file
|
| 360 |
# Read the Excel file
|
| 361 |
df = pd.read_excel(file_path)
|
| 362 |
|
| 363 |
# Process the DataFrame
|
| 364 |
+
console_messages.append("Processing the DataFrame...")
|
| 365 |
+
result_df, console_messages = nlp_pipeline(df)
|
| 366 |
|
| 367 |
# output_file = "Output_ProjectProposals.xlsx"
|
| 368 |
output_file = "Output_Proposals.xlsx"
|
| 369 |
result_df.to_excel(output_file, index=False)
|
| 370 |
+
|
| 371 |
+
console_messages.append("Processing completed. Ready for download.")
|
| 372 |
+
return output_file, "\n".join(console_messages) # Return the processed DataFrame as Excel file
|
| 373 |
|
| 374 |
except Exception as e:
|
| 375 |
# return str(e) # Return the error message
|
| 376 |
+
# error_message = f"Error processing file: {str(e)}"
|
| 377 |
+
# print(error_message) # Log the error
|
| 378 |
+
console_messages.append(f"Error during processing: {str(e)}")
|
| 379 |
+
# return error_message, "Santanu Banerjee" # Return the error message to the user
|
| 380 |
+
return None, "\n".join(console_messages)
|
| 381 |
|
| 382 |
|
| 383 |
|