Spaces:

SantanuBanerjee
/

TaxDirection

Sleeping

App Files Files Community

SantanuBanerjee commited on Aug 6, 2024

Commit

bd96e0d

verified ·

1 Parent(s): 38b2a27

Update app.py

Browse files

Files changed (1) hide show

app.py +15 -19

app.py CHANGED Viewed

@@ -462,15 +462,15 @@ def nlp_pipeline(original_df):
     # Create cluster dataframes
     budget_cluster_df, problem_cluster_df = create_cluster_dataframes(processed_df)
-    return processed_df, budget_cluster_df, problem_cluster_df, location_clusters, problem_clusters
-    # # # Generate project proposals
-    # # location_clusters = dict(enumerate(processed_df['Location_Category_Words'].unique()))
-    # # problem_clusters = dict(enumerate(processed_df['Problem_Category_Words'].unique()))
-    # project_proposals = create_project_proposals(budget_cluster_df, problem_cluster_df, location_clusters, problem_clusters)
-    # console_messages.append("NLP pipeline completed.")
-    # return processed_df, budget_cluster_df, problem_cluster_df, project_proposals, location_clusters, problem_clusters
@@ -490,26 +490,22 @@ def process_excel(file):
     try:
         # Process the DataFrame
         console_messages.append("Processing the DataFrame...")
-        # processed_df, budget_cluster_df, problem_cluster_df, project_proposals, location_clusters, problem_clusters = nlp_pipeline(df)
-        processed_df, budget_cluster_df, problem_cluster_df, location_clusters, problem_clusters  = nlp_pipeline(df)
         output_filename = "OutPut_PPs.xlsx"
         with pd.ExcelWriter(output_filename) as writer:
-            #### Convert project_proposals dictionary to DataFrame
-            # project_proposals_df = pd.DataFrame.from_dict(project_proposals, orient='index', columns=['Solutions Proposed'])
-            # project_proposals_df.index.names = ['Location_Cluster', 'Problem_Cluster']
-            # project_proposals_df.reset_index(inplace=True)
-            # project_proposals_df.to_excel(writer, sheet_name='Project_Proposals', index=False)
             budget_cluster_df.to_excel(writer, sheet_name='Financial_Weights')
             processed_df.to_excel(writer, sheet_name='Input_Processed', index=False)
-            if isinstance(problem_cluster_df, pd.DataFrame):
-                problem_cluster_df.to_excel(writer, sheet_name='Problem_Descriptions')
-            else:
-                console_messages.append("Converting Location Clusters to df")
-                pd.DataFrame(problem_cluster_df).to_excel(writer, sheet_name='Problem_Descriptions', index=False)
             # # Ensure location_clusters and problem_clusters are in DataFrame format
             # if isinstance(location_clusters, pd.DataFrame):

     # Create cluster dataframes
     budget_cluster_df, problem_cluster_df = create_cluster_dataframes(processed_df)
+    # return processed_df, budget_cluster_df, problem_cluster_df, location_clusters, problem_clusters
+    # # Generate project proposals
+    # location_clusters = dict(enumerate(processed_df['Location_Category_Words'].unique()))
+    # problem_clusters = dict(enumerate(processed_df['Problem_Category_Words'].unique()))
+    project_proposals = create_project_proposals(budget_cluster_df, problem_cluster_df, location_clusters, problem_clusters)
+    console_messages.append("NLP pipeline completed.")
+    return processed_df, budget_cluster_df, problem_cluster_df, project_proposals, location_clusters, problem_clusters
     try:
         # Process the DataFrame
         console_messages.append("Processing the DataFrame...")
+        processed_df, budget_cluster_df, problem_cluster_df, project_proposals, location_clusters, problem_clusters = nlp_pipeline(df)
+        # processed_df, budget_cluster_df, problem_cluster_df, location_clusters, problem_clusters  = nlp_pipeline(df)
         output_filename = "OutPut_PPs.xlsx"
         with pd.ExcelWriter(output_filename) as writer:
+            ### Convert project_proposals dictionary to DataFrame
+            project_proposals_df = pd.DataFrame.from_dict(project_proposals, orient='index', columns=['Solutions Proposed'])
+            project_proposals_df.index.names = ['Location_Cluster', 'Problem_Cluster']
+            project_proposals_df.reset_index(inplace=True)
+            project_proposals_df.to_excel(writer, sheet_name='Project_Proposals', index=False)
             budget_cluster_df.to_excel(writer, sheet_name='Financial_Weights')
+            problem_cluster_df.to_excel(writer, sheet_name='Problem_Descriptions')
             processed_df.to_excel(writer, sheet_name='Input_Processed', index=False)
             # # Ensure location_clusters and problem_clusters are in DataFrame format
             # if isinstance(location_clusters, pd.DataFrame):