Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -359,22 +359,18 @@ def create_cluster_dataframes(processed_df):
|
|
| 359 |
return budget_cluster_df, problem_cluster_df
|
| 360 |
|
| 361 |
from transformers import GPTNeoForCausalLM, GPT2Tokenizer
|
| 362 |
-
def generate_project_proposal(
|
| 363 |
print("Trying to access gpt-neo-1.3B")
|
| 364 |
-
print("
|
| 365 |
-
|
| 366 |
-
|
| 367 |
-
|
| 368 |
-
|
| 369 |
-
|
| 370 |
-
|
| 371 |
-
|
| 372 |
-
|
| 373 |
-
|
| 374 |
-
# prompt = f"Generate a project proposal for the following:\n\nLocation: {location}\nProblem Domain: {problem_domain}\nProblems: {problems_summary}\nBudget: ${financial_weight:.2f}\n\nProject Proposal:"
|
| 375 |
-
prompt = f"Generate a solution oriented project proposal for the following:\n\nLocation: {location}\nProblem Domain: {problem_domain}\nProblems: {problems_summary}\n\nProject Proposal:"
|
| 376 |
-
|
| 377 |
-
# Generate the proposal
|
| 378 |
try:
|
| 379 |
input_ids = tokenizer.encode(prompt, return_tensors="pt")
|
| 380 |
print("Input IDs shape:", input_ids.shape)
|
|
@@ -417,14 +413,21 @@ def create_project_proposals(budget_cluster_df, problem_cluster_df, location_clu
|
|
| 417 |
print("problem_descriptions: ",problem_descriptions)
|
| 418 |
|
| 419 |
if problem_descriptions and not pd.isna(problem_descriptions):
|
|
|
|
| 420 |
print(f"\nGenerating proposal for location: {location}, problem domain: {problem_domain}")
|
|
|
|
| 421 |
# console_messages.append(f"\nGenerating proposal for location: {location}, problem domain: {problem_domain}")
|
| 422 |
-
|
| 423 |
-
|
| 424 |
-
|
| 425 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 426 |
proposals[(loc, prob)] = proposal
|
| 427 |
print("Generated Proposal: ", proposal)
|
|
|
|
| 428 |
else:
|
| 429 |
print(f"Skipping empty problem descriptions for location: {location}, problem domain: {problem_domain}")
|
| 430 |
|
|
@@ -509,9 +512,6 @@ def nlp_pipeline(original_df):
|
|
| 509 |
except Exception as e:
|
| 510 |
console_messages.append(f"Error in extract_location_clusters: {str(e)}")
|
| 511 |
console_messages.append("NLP pipeline for location extraction completed.")
|
| 512 |
-
|
| 513 |
-
|
| 514 |
-
|
| 515 |
|
| 516 |
|
| 517 |
# Create cluster dataframes
|
|
@@ -525,8 +525,8 @@ def nlp_pipeline(original_df):
|
|
| 525 |
# # Generate project proposals
|
| 526 |
# location_clusters = dict(enumerate(processed_df['Location_Category_Words'].unique()))
|
| 527 |
# problem_clusters = dict(enumerate(processed_df['Problem_Category_Words'].unique()))
|
| 528 |
-
print("\n location_clusters_2: ", location_clusters)
|
| 529 |
-
print("\n problem_clusters_2: ", problem_clusters)
|
| 530 |
project_proposals = create_project_proposals(budget_cluster_df, problem_cluster_df, location_clusters, problem_clusters)
|
| 531 |
|
| 532 |
console_messages.append("NLP pipeline completed.")
|
|
@@ -556,11 +556,11 @@ def process_excel(file):
|
|
| 556 |
|
| 557 |
output_filename = "OutPut_PPs.xlsx"
|
| 558 |
with pd.ExcelWriter(output_filename) as writer:
|
| 559 |
-
|
| 560 |
-
|
| 561 |
-
|
| 562 |
-
|
| 563 |
-
|
| 564 |
|
| 565 |
budget_cluster_df.to_excel(writer, sheet_name='Financial_Weights')
|
| 566 |
problem_cluster_df.to_excel(writer, sheet_name='Problem_Descriptions')
|
|
|
|
| 359 |
return budget_cluster_df, problem_cluster_df
|
| 360 |
|
| 361 |
from transformers import GPTNeoForCausalLM, GPT2Tokenizer
|
| 362 |
+
def generate_project_proposal(prompt):
|
| 363 |
print("Trying to access gpt-neo-1.3B")
|
| 364 |
+
print("prompt: \t", prompt)
|
| 365 |
+
try:
|
| 366 |
+
# Generate the proposal
|
| 367 |
+
model = GPTNeoForCausalLM.from_pretrained("EleutherAI/gpt-neo-1.3B")
|
| 368 |
+
tokenizer = GPT2Tokenizer.from_pretrained("EleutherAI/gpt-neo-1.3B")
|
| 369 |
+
except Exception as e:
|
| 370 |
+
print("Error loading models:", str(e))
|
| 371 |
+
console_messages.append("\n Error Loading Models")
|
| 372 |
+
return prompt
|
| 373 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
| 374 |
try:
|
| 375 |
input_ids = tokenizer.encode(prompt, return_tensors="pt")
|
| 376 |
print("Input IDs shape:", input_ids.shape)
|
|
|
|
| 413 |
print("problem_descriptions: ",problem_descriptions)
|
| 414 |
|
| 415 |
if problem_descriptions and not pd.isna(problem_descriptions):
|
| 416 |
+
|
| 417 |
print(f"\nGenerating proposal for location: {location}, problem domain: {problem_domain}")
|
| 418 |
+
|
| 419 |
# console_messages.append(f"\nGenerating proposal for location: {location}, problem domain: {problem_domain}")
|
| 420 |
+
|
| 421 |
+
# Prepare the prompt
|
| 422 |
+
problems_summary = "; ".join(problem_descriptions[:3]) # Limit to first 3 for brevity
|
| 423 |
+
# problems_summary = "; ".join(problem_descriptions)
|
| 424 |
+
# prompt = f"Generate a project proposal for the following:\n\nLocation: {location}\nProblem Domain: {problem_domain}\nProblems: {problems_summary}\nBudget: ${financial_weight:.2f}\n\nProject Proposal:"
|
| 425 |
+
prompt = f"Generate a solution oriented project proposal for the following:\n\nLocation: {location}\nProblem Domain: {problem_domain}\nProblems: {problems_summary}\n\nProject Proposal:"
|
| 426 |
+
|
| 427 |
+
proposal = generate_project_proposal(prompt)
|
| 428 |
proposals[(loc, prob)] = proposal
|
| 429 |
print("Generated Proposal: ", proposal)
|
| 430 |
+
|
| 431 |
else:
|
| 432 |
print(f"Skipping empty problem descriptions for location: {location}, problem domain: {problem_domain}")
|
| 433 |
|
|
|
|
| 512 |
except Exception as e:
|
| 513 |
console_messages.append(f"Error in extract_location_clusters: {str(e)}")
|
| 514 |
console_messages.append("NLP pipeline for location extraction completed.")
|
|
|
|
|
|
|
|
|
|
| 515 |
|
| 516 |
|
| 517 |
# Create cluster dataframes
|
|
|
|
| 525 |
# # Generate project proposals
|
| 526 |
# location_clusters = dict(enumerate(processed_df['Location_Category_Words'].unique()))
|
| 527 |
# problem_clusters = dict(enumerate(processed_df['Problem_Category_Words'].unique()))
|
| 528 |
+
# print("\n location_clusters_2: ", location_clusters)
|
| 529 |
+
# print("\n problem_clusters_2: ", problem_clusters)
|
| 530 |
project_proposals = create_project_proposals(budget_cluster_df, problem_cluster_df, location_clusters, problem_clusters)
|
| 531 |
|
| 532 |
console_messages.append("NLP pipeline completed.")
|
|
|
|
| 556 |
|
| 557 |
output_filename = "OutPut_PPs.xlsx"
|
| 558 |
with pd.ExcelWriter(output_filename) as writer:
|
| 559 |
+
### Convert project_proposals dictionary to DataFrame
|
| 560 |
+
project_proposals_df = pd.DataFrame.from_dict(project_proposals, orient='index', columns=['Solutions Proposed'])
|
| 561 |
+
project_proposals_df.index.names = ['Location_Cluster', 'Problem_Cluster']
|
| 562 |
+
project_proposals_df.reset_index(inplace=True)
|
| 563 |
+
project_proposals_df.to_excel(writer, sheet_name='Project_Proposals', index=False)
|
| 564 |
|
| 565 |
budget_cluster_df.to_excel(writer, sheet_name='Financial_Weights')
|
| 566 |
problem_cluster_df.to_excel(writer, sheet_name='Problem_Descriptions')
|