Spaces:

SantanuBanerjee
/

TaxDirection

Sleeping

App Files Files Community

SantanuBanerjee commited on Aug 6, 2024

Commit

3a814a1

verified ·

1 Parent(s): 4259c64

Update app.py

Browse files

Files changed (1) hide show

app.py +101 -10

app.py CHANGED Viewed

@@ -341,6 +341,81 @@ def extract_location_clusters(df,
 def nlp_pipeline(original_df):
     console_messages.append("Starting NLP pipeline...")
@@ -380,12 +455,25 @@ def nlp_pipeline(original_df):
         console_messages.append(f"Error in extract_location_clusters: {str(e)}")
     console_messages.append("NLP pipeline for location extraction completed.")
-    console_messages.append("NLP pipeline completed.")
-    return processed_df
@@ -400,14 +488,17 @@ def process_excel(file):
     try:
         # Process the DataFrame
         console_messages.append("Processing the DataFrame...")
-        result_df = nlp_pipeline(df)
-        # output_file = "Output_ProjectProposals.xlsx"
-        output_file = "Output_Proposals.xlsx"
-        result_df.to_excel(output_file, index=False)
         console_messages.append("Processing completed. Ready for download.")
-        return output_file, "\n".join(console_messages)  # Return the processed DataFrame as Excel file
     except Exception as e:
         # return str(e)  # Return the error message
@@ -422,8 +513,8 @@ def process_excel(file):
 example_files = []
-# example_files.append('#TaxDirection (Responses)_BasicExample.xlsx')
-example_files.append('#TaxDirection (Responses)_IntermediateExample.xlsx')
 # example_files.append('#TaxDirection (Responses)_UltimateExample.xlsx')

+def create_cluster_dataframes(processed_df):
+    # Create a dataframe for Financial Weights
+    budget_cluster_df = processed_df.pivot_table(
+        values='Financial_Weight',
+        index='Location_Cluster',
+        columns='Problem_Cluster',
+        aggfunc='sum',
+        fill_value=0)
+    # Create a dataframe for Problem Descriptions
+    problem_cluster_df = processed_df.groupby(['Location_Cluster', 'Problem_Cluster'])['Problem_Description'].apply(list).unstack()
+    return budget_cluster_df, problem_cluster_df
+from transformers import GPTNeoForCausalLM, GPT2Tokenizer
+def generate_project_proposal(problem_descriptions, location, problem_domain):
+    model = GPTNeoForCausalLM.from_pretrained("EleutherAI/gpt-neo-1.3B")
+    tokenizer = GPT2Tokenizer.from_pretrained("EleutherAI/gpt-neo-1.3B")
+    # Prepare the prompt
+    problems_summary = "; ".join(problem_descriptions[:3])  # Limit to first 3 for brevity
+    # problems_summary = "; ".join(problem_descriptions)
+    # prompt = f"Generate a project proposal for the following:\n\nLocation: {location}\nProblem Domain: {problem_domain}\nProblems: {problems_summary}\nBudget: ${financial_weight:.2f}\n\nProject Proposal:"
+    prompt = f"Generate a solution oriented project proposal for the following:\n\nLocation: {location}\nProblem Domain: {problem_domain}\nProblems: {problems_summary}\n\nProject Proposal:"
+    # Generate the proposal
+    input_ids = tokenizer.encode(prompt, return_tensors="pt")
+    output = model.generate(
+        input_ids,
+        max_length=300,
+        num_return_sequences=1,
+        no_repeat_ngram_size=2,
+        temperature=0.75)
+    proposal = tokenizer.decode(output[0], skip_special_tokens=True)
+    return proposal
+def create_project_proposals(budget_cluster_df, problem_cluster_df, location_clusters, problem_clusters):
+    proposals = {}
+    for loc in budget_cluster_df.index:
+        for prob in budget_cluster_df.columns:
+            location = ", ".join(location_clusters[loc])
+            problem_domain = ", ".join(problem_clusters[prob])
+            problem_descriptions = problem_cluster_df.loc[loc, prob]
+            if problem_descriptions:
+                proposal = generate_project_proposal(
+                    problem_descriptions,
+                    location,
+                    problem_domain)
+                proposals[(loc, prob)] = proposal
+    return proposals
 def nlp_pipeline(original_df):
     console_messages.append("Starting NLP pipeline...")
         console_messages.append(f"Error in extract_location_clusters: {str(e)}")
     console_messages.append("NLP pipeline for location extraction completed.")
+    # Create cluster dataframes
+    budget_cluster_df, problem_cluster_df = create_cluster_dataframes(processed_df)
+    # Generate project proposals
+    location_clusters = dict(enumerate(processed_df['Location_Category_Words'].unique()))
+    problem_clusters = dict(enumerate(processed_df['Problem_Category_Words'].unique()))
+    project_proposals = create_project_proposals(budget_cluster_df, problem_cluster_df, location_clusters, problem_clusters)
+    console_messages.append("NLP pipeline completed.")
+    return processed_df, budget_cluster_df, problem_cluster_df, project_proposals
     try:
         # Process the DataFrame
         console_messages.append("Processing the DataFrame...")
+        processed_df, budget_cluster_df, problem_cluster_df, project_proposals = nlp_pipeline(df)
+        output_filename = "OutPut_PPs.xlsx"
+        with pd.ExcelWriter(output_filename) as writer:
+            project_proposals.to_excel(writer, sheet_name='Project_Proposals', index=False)
+            budget_cluster_df.to_excel(writer, sheet_name='Financial_Weights')
+            problem_cluster_df.to_excel(writer, sheet_name='Problem_Descriptions')
+            processed_df.to_excel(writer, sheet_name='Input_Processed', index=False)
         console_messages.append("Processing completed. Ready for download.")
+        return output_filename, "\n".join(console_messages)  # Return the processed DataFrame as Excel file
     except Exception as e:
         # return str(e)  # Return the error message
 example_files = []
+example_files.append('#TaxDirection (Responses)_BasicExample.xlsx')
+# example_files.append('#TaxDirection (Responses)_IntermediateExample.xlsx')
 # example_files.append('#TaxDirection (Responses)_UltimateExample.xlsx')