Spaces:

SantanuBanerjee
/

TaxDirection

Sleeping

App Files Files Community

SantanuBanerjee commited on Aug 4, 2024

Commit

b446f1b

verified ·

1 Parent(s): e636253

Update app.py

Browse files

Files changed (1) hide show

app.py +17 -13

app.py CHANGED Viewed

@@ -1,7 +1,8 @@
 import gradio as gr
 import pandas as pd
-def data_pre_processing(file_responses):
     # Financial Weights can be anything (ultimately the row-wise weights are aggregated and the corresponding fractions are obtained from that rows' total tax payed)
     try: # Define the columns to be processed
@@ -62,11 +63,14 @@ def data_pre_processing(file_responses):
         # Different return can be used to check the processing
         # return file_responses
-        return merged_dataset
     except Exception as e:
-        return str(e)
@@ -225,8 +229,8 @@ def extract_problem_domains(df,
                             text_column='Problem_Description',
                             cluster_range=(10, 50),
                             top_words=17,
-                            # method='sentence_transformers'
-                            method='tfidf_kmeans'
                            ):
@@ -317,11 +321,12 @@ def extract_problem_domains(df,
-def nlp_pipeline(original_df):
     # Data Preprocessing
-    processed_df = data_pre_processing(original_df) # merged_dataset
     # Starting the Pipeline for Domain Extraction
@@ -344,10 +349,9 @@ def nlp_pipeline(original_df):
     # location_clusters, location_model = perform_clustering(processed_df['Geographical_Location'], n_clusters=5)
     # return processed_df
-    return domain_df, "NLP Pipeline"
 def process_excel(file):
@@ -362,7 +366,7 @@ def process_excel(file):
         # Process the DataFrame
         console_messages.append("Processing the DataFrame...")
-        result_df, console_messages = nlp_pipeline(df)
         # output_file = "Output_ProjectProposals.xlsx"
         output_file = "Output_Proposals.xlsx"

 import gradio as gr
 import pandas as pd
+def data_pre_processing(file_responses, console_messages):
+    console_messages.append("Starting data pre-processing...")
     # Financial Weights can be anything (ultimately the row-wise weights are aggregated and the corresponding fractions are obtained from that rows' total tax payed)
     try: # Define the columns to be processed
         # Different return can be used to check the processing
+        console_messages.append("Data pre-processing completed.")
         # return file_responses
+        return merged_dataset, console_messages
     except Exception as e:
+        console_messages.append(f"Error during data pre-processing: {str(e)}")
+        # return str(e), console_messages
+        return None, console_messages
                             text_column='Problem_Description',
                             cluster_range=(10, 50),
                             top_words=17,
+                            method='sentence_transformers'
+                            # method='tfidf_kmeans'
                            ):
+# def nlp_pipeline(original_df):
+def nlp_pipeline(original_df, console_messages):
+    console_messages.append("Starting NLP pipeline...")
     # Data Preprocessing
+    processed_df, console_messages = data_pre_processing(original_df, console_messages) # merged_dataset
     # Starting the Pipeline for Domain Extraction
     # location_clusters, location_model = perform_clustering(processed_df['Geographical_Location'], n_clusters=5)
+    console_messages.append("NLP pipeline completed.")
     # return processed_df
+    return domain_df, console_messages
 def process_excel(file):
         # Process the DataFrame
         console_messages.append("Processing the DataFrame...")
+        result_df, console_messages = nlp_pipeline(df, console_messages)
         # output_file = "Output_ProjectProposals.xlsx"
         output_file = "Output_Proposals.xlsx"