Spaces:

SantanuBanerjee
/

TaxDirection

Sleeping

App Files Files Community

SantanuBanerjee commited on Aug 6, 2024

Commit

b59ee01

verified ·

1 Parent(s): ac713f6

Update app.py

Browse files

Files changed (1) hide show

app.py +29 -29

app.py CHANGED Viewed

@@ -359,22 +359,18 @@ def create_cluster_dataframes(processed_df):
     return budget_cluster_df, problem_cluster_df
 from transformers import GPTNeoForCausalLM, GPT2Tokenizer
-def generate_project_proposal(problem_descriptions, location, problem_domain):
     print("Trying to access gpt-neo-1.3B")
-    print("problem_descriptions: ", problem_descriptions)
-    print("location: ", location)
-    print("problem_domain: ", problem_domain)
-    model = GPTNeoForCausalLM.from_pretrained("EleutherAI/gpt-neo-1.3B")
-    tokenizer = GPT2Tokenizer.from_pretrained("EleutherAI/gpt-neo-1.3B")
-    # Prepare the prompt
-    problems_summary = "; ".join(problem_descriptions[:3])  # Limit to first 3 for brevity
-    # problems_summary = "; ".join(problem_descriptions)
-    # prompt = f"Generate a project proposal for the following:\n\nLocation: {location}\nProblem Domain: {problem_domain}\nProblems: {problems_summary}\nBudget: ${financial_weight:.2f}\n\nProject Proposal:"
-    prompt = f"Generate a solution oriented project proposal for the following:\n\nLocation: {location}\nProblem Domain: {problem_domain}\nProblems: {problems_summary}\n\nProject Proposal:"
-    # Generate the proposal
     try:
         input_ids = tokenizer.encode(prompt, return_tensors="pt")
         print("Input IDs shape:", input_ids.shape)
@@ -417,14 +413,21 @@ def create_project_proposals(budget_cluster_df, problem_cluster_df, location_clu
             print("problem_descriptions: ",problem_descriptions)
             if problem_descriptions and not pd.isna(problem_descriptions):
                 print(f"\nGenerating proposal for location: {location}, problem domain: {problem_domain}")
                 # console_messages.append(f"\nGenerating proposal for location: {location}, problem domain: {problem_domain}")
-                proposal = generate_project_proposal(
-                    problem_descriptions,
-                    location,
-                    problem_domain)
                 proposals[(loc, prob)] = proposal
                 print("Generated Proposal: ", proposal)
             else:
                 print(f"Skipping empty problem descriptions for location: {location}, problem domain: {problem_domain}")
@@ -509,9 +512,6 @@ def nlp_pipeline(original_df):
     except Exception as e:
         console_messages.append(f"Error in extract_location_clusters: {str(e)}")
     console_messages.append("NLP pipeline for location extraction completed.")
     # Create cluster dataframes
@@ -525,8 +525,8 @@ def nlp_pipeline(original_df):
     # # Generate project proposals
     # location_clusters = dict(enumerate(processed_df['Location_Category_Words'].unique()))
     # problem_clusters = dict(enumerate(processed_df['Problem_Category_Words'].unique()))
-    print("\n location_clusters_2: ", location_clusters)
-    print("\n problem_clusters_2: ", problem_clusters)
     project_proposals = create_project_proposals(budget_cluster_df, problem_cluster_df, location_clusters, problem_clusters)
     console_messages.append("NLP pipeline completed.")
@@ -556,11 +556,11 @@ def process_excel(file):
         output_filename = "OutPut_PPs.xlsx"
         with pd.ExcelWriter(output_filename) as writer:
-            # ### Convert project_proposals dictionary to DataFrame
-            # project_proposals_df = pd.DataFrame.from_dict(project_proposals, orient='index', columns=['Solutions Proposed'])
-            # project_proposals_df.index.names = ['Location_Cluster', 'Problem_Cluster']
-            # project_proposals_df.reset_index(inplace=True)
-            # project_proposals_df.to_excel(writer, sheet_name='Project_Proposals', index=False)
             budget_cluster_df.to_excel(writer, sheet_name='Financial_Weights')
             problem_cluster_df.to_excel(writer, sheet_name='Problem_Descriptions')

     return budget_cluster_df, problem_cluster_df
 from transformers import GPTNeoForCausalLM, GPT2Tokenizer
+def generate_project_proposal(prompt):
     print("Trying to access gpt-neo-1.3B")
+    print("prompt: \t", prompt)
+    try:
+        # Generate the proposal
+        model = GPTNeoForCausalLM.from_pretrained("EleutherAI/gpt-neo-1.3B")
+        tokenizer = GPT2Tokenizer.from_pretrained("EleutherAI/gpt-neo-1.3B")
+    except Exception as e:
+        print("Error loading models:", str(e))
+        console_messages.append("\n Error Loading Models")
+        return prompt
     try:
         input_ids = tokenizer.encode(prompt, return_tensors="pt")
         print("Input IDs shape:", input_ids.shape)
             print("problem_descriptions: ",problem_descriptions)
             if problem_descriptions and not pd.isna(problem_descriptions):
                 print(f"\nGenerating proposal for location: {location}, problem domain: {problem_domain}")
                 # console_messages.append(f"\nGenerating proposal for location: {location}, problem domain: {problem_domain}")
+                # Prepare the prompt
+                problems_summary = "; ".join(problem_descriptions[:3])  # Limit to first 3 for brevity
+                # problems_summary = "; ".join(problem_descriptions)
+                # prompt = f"Generate a project proposal for the following:\n\nLocation: {location}\nProblem Domain: {problem_domain}\nProblems: {problems_summary}\nBudget: ${financial_weight:.2f}\n\nProject Proposal:"
+                prompt = f"Generate a solution oriented project proposal for the following:\n\nLocation: {location}\nProblem Domain: {problem_domain}\nProblems: {problems_summary}\n\nProject Proposal:"
+                proposal = generate_project_proposal(prompt)
                 proposals[(loc, prob)] = proposal
                 print("Generated Proposal: ", proposal)
             else:
                 print(f"Skipping empty problem descriptions for location: {location}, problem domain: {problem_domain}")
     except Exception as e:
         console_messages.append(f"Error in extract_location_clusters: {str(e)}")
     console_messages.append("NLP pipeline for location extraction completed.")
     # Create cluster dataframes
     # # Generate project proposals
     # location_clusters = dict(enumerate(processed_df['Location_Category_Words'].unique()))
     # problem_clusters = dict(enumerate(processed_df['Problem_Category_Words'].unique()))
+    # print("\n location_clusters_2: ", location_clusters)
+    # print("\n problem_clusters_2: ", problem_clusters)
     project_proposals = create_project_proposals(budget_cluster_df, problem_cluster_df, location_clusters, problem_clusters)
     console_messages.append("NLP pipeline completed.")
         output_filename = "OutPut_PPs.xlsx"
         with pd.ExcelWriter(output_filename) as writer:
+            ### Convert project_proposals dictionary to DataFrame
+            project_proposals_df = pd.DataFrame.from_dict(project_proposals, orient='index', columns=['Solutions Proposed'])
+            project_proposals_df.index.names = ['Location_Cluster', 'Problem_Cluster']
+            project_proposals_df.reset_index(inplace=True)
+            project_proposals_df.to_excel(writer, sheet_name='Project_Proposals', index=False)
             budget_cluster_df.to_excel(writer, sheet_name='Financial_Weights')
             problem_cluster_df.to_excel(writer, sheet_name='Problem_Descriptions')