Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -194,7 +194,7 @@ from collections import Counter
|
|
| 194 |
|
| 195 |
def extract_problem_domains(df,
|
| 196 |
text_column='Processed_ProblemDescription_forDomainExtraction',
|
| 197 |
-
cluster_range=(
|
| 198 |
top_words=10):
|
| 199 |
consoleMessage_and_Print("Extracting Problem Domains...")
|
| 200 |
|
|
@@ -276,7 +276,7 @@ def text_processing_for_location(text):
|
|
| 276 |
def extract_location_clusters(df,
|
| 277 |
text_column1='Processed_LocationText_forClustering', # Extracted through NLP
|
| 278 |
text_column2='Geographical_Location', # User Input
|
| 279 |
-
cluster_range=(
|
| 280 |
top_words=10):
|
| 281 |
# Combine the two text columns
|
| 282 |
text_column = "Combined_Location_Text"
|
|
@@ -371,7 +371,7 @@ def generate_project_proposal(prompt): # Generate the proposal
|
|
| 371 |
|
| 372 |
model = GPTNeoForCausalLM.from_pretrained(model_Name)
|
| 373 |
tokenizer = GPT2Tokenizer.from_pretrained(model_Name)
|
| 374 |
-
model_max_token_limit = 2048 #1500
|
| 375 |
|
| 376 |
try:
|
| 377 |
# input_ids = tokenizer.encode(prompt, return_tensors="pt")
|
|
@@ -457,13 +457,17 @@ def create_project_proposals(budget_cluster_df, problem_cluster_df, location_clu
|
|
| 457 |
|
| 458 |
random.shuffle(shuffled_descriptions)
|
| 459 |
# Prepare the prompt
|
| 460 |
-
# problems_summary = "; \n".join(shuffled_descriptions[:
|
| 461 |
-
problems_summary = "; \n".join(
|
|
|
|
|
|
|
| 462 |
|
| 463 |
|
| 464 |
# prompt = f"Generate a solution oriented project proposal for the following:\n\nLocation: {location}\nProblem Domain: {problem_domain}\nProblems: {problems_summary}\n\nProject Proposal:"
|
| 465 |
# prompt = f"Generate a solution-oriented project proposal for the following public problem (only output the proposal):\n\n Geographical/Digital Location: {location}\nProblem Category: {problem_domain}\nProblems: {problems_summary}\n\nProject Proposal:"
|
| 466 |
-
prompt = f"Generate a singular solution-oriented project proposal bespoke to the following Location~Domain cluster of public problems:\n\n Geographical/Digital Location: {location}\nProblem Domain: {problem_domain}\nProblems: {problems_summary}\n\nProject Proposal: \t"
|
|
|
|
|
|
|
| 467 |
|
| 468 |
proposal = generate_project_proposal(prompt)
|
| 469 |
# Check if proposal is valid
|
|
@@ -766,9 +770,9 @@ def process_excel(file):
|
|
| 766 |
|
| 767 |
|
| 768 |
example_files = []
|
| 769 |
-
|
| 770 |
-
|
| 771 |
-
example_files.append('#TaxDirection (Responses)_UltimateExample.xlsx')
|
| 772 |
|
| 773 |
|
| 774 |
import random
|
|
@@ -785,7 +789,7 @@ interface = gr.Interface(
|
|
| 785 |
|
| 786 |
outputs=[
|
| 787 |
gr.File(label="Download the processed Excel File containing the ** Project Proposals ** for each Location~Problem paired combination"), # File download output
|
| 788 |
-
gr.Textbox(label="Console Messages", lines=
|
| 789 |
],
|
| 790 |
|
| 791 |
|
|
|
|
| 194 |
|
| 195 |
def extract_problem_domains(df,
|
| 196 |
text_column='Processed_ProblemDescription_forDomainExtraction',
|
| 197 |
+
cluster_range=(2, 10),
|
| 198 |
top_words=10):
|
| 199 |
consoleMessage_and_Print("Extracting Problem Domains...")
|
| 200 |
|
|
|
|
| 276 |
def extract_location_clusters(df,
|
| 277 |
text_column1='Processed_LocationText_forClustering', # Extracted through NLP
|
| 278 |
text_column2='Geographical_Location', # User Input
|
| 279 |
+
cluster_range=(2, 10),
|
| 280 |
top_words=10):
|
| 281 |
# Combine the two text columns
|
| 282 |
text_column = "Combined_Location_Text"
|
|
|
|
| 371 |
|
| 372 |
model = GPTNeoForCausalLM.from_pretrained(model_Name)
|
| 373 |
tokenizer = GPT2Tokenizer.from_pretrained(model_Name)
|
| 374 |
+
model_max_token_limit = 1750 #2048 #1500
|
| 375 |
|
| 376 |
try:
|
| 377 |
# input_ids = tokenizer.encode(prompt, return_tensors="pt")
|
|
|
|
| 457 |
|
| 458 |
random.shuffle(shuffled_descriptions)
|
| 459 |
# Prepare the prompt
|
| 460 |
+
# problems_summary = "; \n".join(shuffled_descriptions[:3]) # Limit to first 3 for brevity
|
| 461 |
+
# problems_summary = "; \n".join([f"Problem: {desc}" for desc in shuffled_descriptions[:5]])
|
| 462 |
+
problems_summary = "; \n".join([f"Problem {i+1}: {desc}" for i, desc in enumerate(shuffled_descriptions[:7])])
|
| 463 |
+
# problems_summary = "; \n".join(shuffled_descriptions) # Join all problem descriptions
|
| 464 |
|
| 465 |
|
| 466 |
# prompt = f"Generate a solution oriented project proposal for the following:\n\nLocation: {location}\nProblem Domain: {problem_domain}\nProblems: {problems_summary}\n\nProject Proposal:"
|
| 467 |
# prompt = f"Generate a solution-oriented project proposal for the following public problem (only output the proposal):\n\n Geographical/Digital Location: {location}\nProblem Category: {problem_domain}\nProblems: {problems_summary}\n\nProject Proposal:"
|
| 468 |
+
# prompt = f"Generate a singular solution-oriented project proposal bespoke to the following Location~Domain cluster of public problems:\n\n Geographical/Digital Location: {location}\nProblem Domain: {problem_domain}\nProblems: {problems_summary}\n\nProject Proposal: \t"
|
| 469 |
+
prompt = f"Generate a singular solution-oriented project proposal bespoke to the following Location~Domain cluster of public problems:\n\n Geographical/Digital Location: {location}\nProblem Domain: {problem_domain}\n\n {problems_summary}\n\nSingle Combined Project Proposal: \t"
|
| 470 |
+
|
| 471 |
|
| 472 |
proposal = generate_project_proposal(prompt)
|
| 473 |
# Check if proposal is valid
|
|
|
|
| 770 |
|
| 771 |
|
| 772 |
example_files = []
|
| 773 |
+
example_files.append('#TaxDirection (Responses)_BasicExample.xlsx')
|
| 774 |
+
example_files.append('#TaxDirection (Responses)_IntermediateExample.xlsx')
|
| 775 |
+
# example_files.append('#TaxDirection (Responses)_UltimateExample.xlsx')
|
| 776 |
|
| 777 |
|
| 778 |
import random
|
|
|
|
| 789 |
|
| 790 |
outputs=[
|
| 791 |
gr.File(label="Download the processed Excel File containing the ** Project Proposals ** for each Location~Problem paired combination"), # File download output
|
| 792 |
+
gr.Textbox(label="Console Messages", lines=7, interactive=False) # Console messages output
|
| 793 |
],
|
| 794 |
|
| 795 |
|