Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -234,7 +234,7 @@ def extract_problem_domains(df,
|
|
| 234 |
|
| 235 |
# console_messages.append("Returning from Problem Domain Extraction function.")
|
| 236 |
console_messages.append("Problem Domain Extraction completed.")
|
| 237 |
-
return df, optimal_n_clusters
|
| 238 |
|
| 239 |
|
| 240 |
|
|
@@ -321,7 +321,7 @@ def extract_location_clusters(df,
|
|
| 321 |
|
| 322 |
df = df.drop(text_column, axis=1)
|
| 323 |
console_messages.append("Location Clustering completed.")
|
| 324 |
-
return df, optimal_n_clusters
|
| 325 |
|
| 326 |
|
| 327 |
|
|
@@ -434,7 +434,7 @@ def nlp_pipeline(original_df):
|
|
| 434 |
|
| 435 |
# Domain Clustering
|
| 436 |
try:
|
| 437 |
-
processed_df, optimal_n_clusters = extract_problem_domains(processed_df)
|
| 438 |
console_messages.append(f"Optimal clusters for Domain extraction: {optimal_n_clusters}")
|
| 439 |
except Exception as e:
|
| 440 |
console_messages.append(f"Error in extract_problem_domains: {str(e)}")
|
|
@@ -449,7 +449,7 @@ def nlp_pipeline(original_df):
|
|
| 449 |
|
| 450 |
# Location Clustering
|
| 451 |
try:
|
| 452 |
-
processed_df, optimal_n_clusters = extract_location_clusters(processed_df)
|
| 453 |
console_messages.append(f"Optimal clusters for Location extraction: {optimal_n_clusters}")
|
| 454 |
except Exception as e:
|
| 455 |
console_messages.append(f"Error in extract_location_clusters: {str(e)}")
|
|
@@ -462,13 +462,13 @@ def nlp_pipeline(original_df):
|
|
| 462 |
# Create cluster dataframes
|
| 463 |
budget_cluster_df, problem_cluster_df = create_cluster_dataframes(processed_df)
|
| 464 |
|
| 465 |
-
# Generate project proposals
|
| 466 |
-
location_clusters = dict(enumerate(processed_df['Location_Category_Words'].unique()))
|
| 467 |
-
problem_clusters = dict(enumerate(processed_df['Problem_Category_Words'].unique()))
|
| 468 |
project_proposals = create_project_proposals(budget_cluster_df, problem_cluster_df, location_clusters, problem_clusters)
|
| 469 |
|
| 470 |
console_messages.append("NLP pipeline completed.")
|
| 471 |
-
return processed_df, budget_cluster_df, problem_cluster_df, project_proposals
|
| 472 |
|
| 473 |
|
| 474 |
|
|
@@ -488,11 +488,16 @@ def process_excel(file):
|
|
| 488 |
try:
|
| 489 |
# Process the DataFrame
|
| 490 |
console_messages.append("Processing the DataFrame...")
|
| 491 |
-
processed_df, budget_cluster_df, problem_cluster_df, project_proposals = nlp_pipeline(df)
|
| 492 |
|
| 493 |
output_filename = "OutPut_PPs.xlsx"
|
| 494 |
with pd.ExcelWriter(output_filename) as writer:
|
| 495 |
-
project_proposals
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 496 |
budget_cluster_df.to_excel(writer, sheet_name='Financial_Weights')
|
| 497 |
problem_cluster_df.to_excel(writer, sheet_name='Problem_Descriptions')
|
| 498 |
processed_df.to_excel(writer, sheet_name='Input_Processed', index=False)
|
|
|
|
| 234 |
|
| 235 |
# console_messages.append("Returning from Problem Domain Extraction function.")
|
| 236 |
console_messages.append("Problem Domain Extraction completed.")
|
| 237 |
+
return df, optimal_n_clusters, cluster_representations
|
| 238 |
|
| 239 |
|
| 240 |
|
|
|
|
| 321 |
|
| 322 |
df = df.drop(text_column, axis=1)
|
| 323 |
console_messages.append("Location Clustering completed.")
|
| 324 |
+
return df, optimal_n_clusters, cluster_representations
|
| 325 |
|
| 326 |
|
| 327 |
|
|
|
|
| 434 |
|
| 435 |
# Domain Clustering
|
| 436 |
try:
|
| 437 |
+
processed_df, optimal_n_clusters, problem_clusters = extract_problem_domains(processed_df)
|
| 438 |
console_messages.append(f"Optimal clusters for Domain extraction: {optimal_n_clusters}")
|
| 439 |
except Exception as e:
|
| 440 |
console_messages.append(f"Error in extract_problem_domains: {str(e)}")
|
|
|
|
| 449 |
|
| 450 |
# Location Clustering
|
| 451 |
try:
|
| 452 |
+
processed_df, optimal_n_clusters, location_clusters = extract_location_clusters(processed_df)
|
| 453 |
console_messages.append(f"Optimal clusters for Location extraction: {optimal_n_clusters}")
|
| 454 |
except Exception as e:
|
| 455 |
console_messages.append(f"Error in extract_location_clusters: {str(e)}")
|
|
|
|
| 462 |
# Create cluster dataframes
|
| 463 |
budget_cluster_df, problem_cluster_df = create_cluster_dataframes(processed_df)
|
| 464 |
|
| 465 |
+
# # Generate project proposals
|
| 466 |
+
# location_clusters = dict(enumerate(processed_df['Location_Category_Words'].unique()))
|
| 467 |
+
# problem_clusters = dict(enumerate(processed_df['Problem_Category_Words'].unique()))
|
| 468 |
project_proposals = create_project_proposals(budget_cluster_df, problem_cluster_df, location_clusters, problem_clusters)
|
| 469 |
|
| 470 |
console_messages.append("NLP pipeline completed.")
|
| 471 |
+
return processed_df, budget_cluster_df, problem_cluster_df, project_proposals, location_clusters, problem_clusters
|
| 472 |
|
| 473 |
|
| 474 |
|
|
|
|
| 488 |
try:
|
| 489 |
# Process the DataFrame
|
| 490 |
console_messages.append("Processing the DataFrame...")
|
| 491 |
+
processed_df, budget_cluster_df, problem_cluster_df, project_proposals, location_clusters, problem_clusters = nlp_pipeline(df)
|
| 492 |
|
| 493 |
output_filename = "OutPut_PPs.xlsx"
|
| 494 |
with pd.ExcelWriter(output_filename) as writer:
|
| 495 |
+
# Convert project_proposals dictionary to DataFrame
|
| 496 |
+
project_proposals_df = pd.DataFrame.from_dict(project_proposals, orient='index', columns=['Solutions Proposed'])
|
| 497 |
+
project_proposals_df.index.names = ['Location_Cluster', 'Problem_Cluster']
|
| 498 |
+
project_proposals_df.reset_index(inplace=True)
|
| 499 |
+
project_proposals_df.to_excel(writer, sheet_name='Project_Proposals', index=False)
|
| 500 |
+
|
| 501 |
budget_cluster_df.to_excel(writer, sheet_name='Financial_Weights')
|
| 502 |
problem_cluster_df.to_excel(writer, sheet_name='Problem_Descriptions')
|
| 503 |
processed_df.to_excel(writer, sheet_name='Input_Processed', index=False)
|