Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -462,15 +462,15 @@ def nlp_pipeline(original_df):
|
|
| 462 |
# Create cluster dataframes
|
| 463 |
budget_cluster_df, problem_cluster_df = create_cluster_dataframes(processed_df)
|
| 464 |
|
| 465 |
-
return processed_df, budget_cluster_df, problem_cluster_df, location_clusters, problem_clusters
|
| 466 |
|
| 467 |
-
# #
|
| 468 |
-
#
|
| 469 |
-
#
|
| 470 |
-
|
| 471 |
|
| 472 |
-
|
| 473 |
-
|
| 474 |
|
| 475 |
|
| 476 |
|
|
@@ -490,26 +490,22 @@ def process_excel(file):
|
|
| 490 |
try:
|
| 491 |
# Process the DataFrame
|
| 492 |
console_messages.append("Processing the DataFrame...")
|
| 493 |
-
|
| 494 |
-
processed_df, budget_cluster_df, problem_cluster_df, location_clusters, problem_clusters = nlp_pipeline(df)
|
| 495 |
|
| 496 |
output_filename = "OutPut_PPs.xlsx"
|
| 497 |
with pd.ExcelWriter(output_filename) as writer:
|
| 498 |
-
|
| 499 |
-
|
| 500 |
-
|
| 501 |
-
|
| 502 |
-
|
| 503 |
|
| 504 |
budget_cluster_df.to_excel(writer, sheet_name='Financial_Weights')
|
|
|
|
| 505 |
processed_df.to_excel(writer, sheet_name='Input_Processed', index=False)
|
| 506 |
|
| 507 |
|
| 508 |
-
if isinstance(problem_cluster_df, pd.DataFrame):
|
| 509 |
-
problem_cluster_df.to_excel(writer, sheet_name='Problem_Descriptions')
|
| 510 |
-
else:
|
| 511 |
-
console_messages.append("Converting Location Clusters to df")
|
| 512 |
-
pd.DataFrame(problem_cluster_df).to_excel(writer, sheet_name='Problem_Descriptions', index=False)
|
| 513 |
|
| 514 |
# # Ensure location_clusters and problem_clusters are in DataFrame format
|
| 515 |
# if isinstance(location_clusters, pd.DataFrame):
|
|
|
|
| 462 |
# Create cluster dataframes
|
| 463 |
budget_cluster_df, problem_cluster_df = create_cluster_dataframes(processed_df)
|
| 464 |
|
| 465 |
+
# return processed_df, budget_cluster_df, problem_cluster_df, location_clusters, problem_clusters
|
| 466 |
|
| 467 |
+
# # Generate project proposals
|
| 468 |
+
# location_clusters = dict(enumerate(processed_df['Location_Category_Words'].unique()))
|
| 469 |
+
# problem_clusters = dict(enumerate(processed_df['Problem_Category_Words'].unique()))
|
| 470 |
+
project_proposals = create_project_proposals(budget_cluster_df, problem_cluster_df, location_clusters, problem_clusters)
|
| 471 |
|
| 472 |
+
console_messages.append("NLP pipeline completed.")
|
| 473 |
+
return processed_df, budget_cluster_df, problem_cluster_df, project_proposals, location_clusters, problem_clusters
|
| 474 |
|
| 475 |
|
| 476 |
|
|
|
|
| 490 |
try:
|
| 491 |
# Process the DataFrame
|
| 492 |
console_messages.append("Processing the DataFrame...")
|
| 493 |
+
processed_df, budget_cluster_df, problem_cluster_df, project_proposals, location_clusters, problem_clusters = nlp_pipeline(df)
|
| 494 |
+
# processed_df, budget_cluster_df, problem_cluster_df, location_clusters, problem_clusters = nlp_pipeline(df)
|
| 495 |
|
| 496 |
output_filename = "OutPut_PPs.xlsx"
|
| 497 |
with pd.ExcelWriter(output_filename) as writer:
|
| 498 |
+
### Convert project_proposals dictionary to DataFrame
|
| 499 |
+
project_proposals_df = pd.DataFrame.from_dict(project_proposals, orient='index', columns=['Solutions Proposed'])
|
| 500 |
+
project_proposals_df.index.names = ['Location_Cluster', 'Problem_Cluster']
|
| 501 |
+
project_proposals_df.reset_index(inplace=True)
|
| 502 |
+
project_proposals_df.to_excel(writer, sheet_name='Project_Proposals', index=False)
|
| 503 |
|
| 504 |
budget_cluster_df.to_excel(writer, sheet_name='Financial_Weights')
|
| 505 |
+
problem_cluster_df.to_excel(writer, sheet_name='Problem_Descriptions')
|
| 506 |
processed_df.to_excel(writer, sheet_name='Input_Processed', index=False)
|
| 507 |
|
| 508 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 509 |
|
| 510 |
# # Ensure location_clusters and problem_clusters are in DataFrame format
|
| 511 |
# if isinstance(location_clusters, pd.DataFrame):
|