Update process_data.py
Browse files- process_data.py +80 -276
process_data.py
CHANGED
|
@@ -87,36 +87,22 @@ client = OpenAI()
|
|
| 87 |
# 'prompt' = ['prompts': ["prompt1": "value", "prompt2", "value"]]
|
| 88 |
#}
|
| 89 |
|
| 90 |
-
|
| 91 |
-
def generate_json(input_data, parameters):
|
| 92 |
"""
|
| 93 |
Function to prompt OpenAI API to generate structured JSON output.
|
| 94 |
|
| 95 |
Args:
|
| 96 |
-
input_data: (dict) The input data, preprocessed, from the user. Aka what will fill in the JSON
|
| 97 |
-
input_data["input_text"] = the preprocessed input text
|
| 98 |
-
input_data["input_context"] = depending on levers, empty or what is put in front of the prompt
|
| 99 |
-
parameters: (dict) All of the individual parameters and "flippers"
|
| 100 |
-
parameters["model_version"] = (str) what model should be used
|
| 101 |
-
parameters["chaining"] = (bool) whether or not the preprocessed input context should be chained (given to multiple models)
|
| 102 |
-
parameters["context_pre_prompt"], parameters["summary_pre_prompt"], parameters["conversation_pre_prompt"], parameters["example_pre_prompt"] = (str) all of the pre-prompts, separated
|
| 103 |
-
parameters["combined_pre_prompt"] = (str) concatenated individual pre-prompts
|
| 104 |
|
| 105 |
Returns:
|
| 106 |
3 processed data-filled JSON objects: farm_pretty_json, interactions_pretty_json, trial_pretty_json
|
| 107 |
"""
|
| 108 |
print("Generating JSON Whole!")
|
| 109 |
-
input_text =
|
| 110 |
-
model_version = parameters["model_version"]
|
| 111 |
|
| 112 |
-
farm_prompt = "
|
| 113 |
-
interactions_prompt = "
|
| 114 |
-
trial_prompt = "
|
| 115 |
-
|
| 116 |
-
if parameters["combined_pre_prompt"]:
|
| 117 |
-
farm_prompt = parameters["combined_pre_prompt"] + farm_prompt
|
| 118 |
-
interactions_prompt = parameters["combined_pre_prompt"] + interactions_prompt
|
| 119 |
-
trial_prompt = parameters["combined_pre_prompt"] + trial_prompt
|
| 120 |
|
| 121 |
|
| 122 |
try:
|
|
@@ -178,8 +164,9 @@ def generate_json(input_data, parameters):
|
|
| 178 |
except Exception as e:
|
| 179 |
return {"error": "Failed to generate valid JSON. " + str(e)}
|
| 180 |
|
|
|
|
| 181 |
# This is for the step-wise JSON creation
|
| 182 |
-
def generate_json_pieces(
|
| 183 |
"""
|
| 184 |
This is primarily for one of the flippers, which allows each individual JSON section to be created individually, then concatenates them all together.
|
| 185 |
It is proposed that perhaps the individual calls to the model will be more robust than giving the model all the data at once.
|
|
@@ -204,18 +191,14 @@ def generate_json_pieces(input_data, parameters):
|
|
| 204 |
print("Generating JSON Pieces!")
|
| 205 |
|
| 206 |
print("INPUT DATA")
|
| 207 |
-
print(
|
| 208 |
-
print("PARAMS")
|
| 209 |
-
print(parameters)
|
| 210 |
|
| 211 |
-
|
| 212 |
-
model_version = parameters["model_version"]
|
| 213 |
|
| 214 |
-
print("
|
| 215 |
-
print(specification)
|
| 216 |
print(model_version)
|
| 217 |
|
| 218 |
-
if parameters["
|
| 219 |
print("Pre prompt is true")
|
| 220 |
field_data_input = input_data["input_text_pieces"]["pre_processed_pieces"]["field_data_input"]
|
| 221 |
planting_data_input = input_data["input_text_pieces"]["pre_processed_pieces"]["planting_data_input"]
|
|
@@ -245,31 +228,18 @@ def generate_json_pieces(input_data, parameters):
|
|
| 245 |
|
| 246 |
# Fix these prompts for all
|
| 247 |
print("Setting prompts")
|
| 248 |
-
field_prompt = "
|
| 249 |
-
plant_prompt = "
|
| 250 |
-
log_prompt = "
|
| 251 |
-
soil_prompt = "
|
| 252 |
-
yield_prompt = "
|
| 253 |
-
|
| 254 |
-
interaction_prompt = "
|
| 255 |
-
person_prompt = "
|
| 256 |
-
|
| 257 |
-
trial_prompt = "
|
| 258 |
-
treatment_prompt = "
|
| 259 |
-
|
| 260 |
-
|
| 261 |
-
if parameters["combined_pre_prompt"]:
|
| 262 |
-
field_prompt = parameters["combined_pre_prompt"] + field_prompt
|
| 263 |
-
plant_prompt = parameters["combined_pre_prompt"] + plant_prompt
|
| 264 |
-
log_prompt = parameters["combined_pre_prompt"] + log_prompt
|
| 265 |
-
soil_prompt = parameters["combined_pre_prompt"] + soil_prompt
|
| 266 |
-
yield_prompt = parameters["combined_pre_prompt"] + yield_prompt
|
| 267 |
-
|
| 268 |
-
interaction_prompt = parameters["combined_pre_prompt"] + interaction_prompt
|
| 269 |
-
person_prompt = parameters["combined_pre_prompt"] + person_prompt
|
| 270 |
|
| 271 |
-
trial_prompt = parameters["combined_pre_prompt"] + trial_prompt
|
| 272 |
-
treatment_prompt = parameters["combined_pre_prompt"] + treatment_prompt
|
| 273 |
|
| 274 |
try:
|
| 275 |
# Call OpenAI API to generate structured output based on prompt
|
|
@@ -496,22 +466,11 @@ def generate_json_pieces(input_data, parameters):
|
|
| 496 |
return {"error": "Failed to generate valid JSON. " + str(e)}
|
| 497 |
|
| 498 |
|
| 499 |
-
def pre_processing(
|
| 500 |
"""
|
| 501 |
In the event there's a pre-prompt, process the pre-prompts and input text accordingly
|
| 502 |
|
| 503 |
Args:
|
| 504 |
-
input_data: (dict) The input data, preprocessed, from the user. Aka what will fill in the JSON
|
| 505 |
-
input_data["input_text"] = (str) the preprocessed input text
|
| 506 |
-
input_data["input_context"] = (str) depending on levers, empty or what is put in front of the prompt
|
| 507 |
-
input_data["input_text_pieces"] = (dict) containing the individual split up prompt pieces: field_data_input, planting_data_input, log_data_input, soil_data_input, yield_data_input
|
| 508 |
-
|
| 509 |
-
parameters: (dict) All of the individual parameters and "flippers"
|
| 510 |
-
parameters["model_version"] = (str) what model should be used
|
| 511 |
-
parameters["chaining"] = (bool) whether or not the preprocessed input context should be chained (given to multiple models)
|
| 512 |
-
parameters["context_pre_prompt"], parameters["summary_pre_prompt"], parameters["conversation_pre_prompt"], parameters["example_pre_prompt"] = (str) all of the pre-prompts, separated
|
| 513 |
-
parameters["combined_pre_prompt"] = (str) concatenated individual pre-prompts
|
| 514 |
-
parameters["additional_json_pieces_options"] = (str) "Explicit specific pieces" or "Parse from one big input text" to indicate whether it's many function calls on one input text or many function calls on smaller pieces of input texts
|
| 515 |
|
| 516 |
Returns:
|
| 517 |
(dict) input_data
|
|
@@ -519,46 +478,49 @@ def pre_processing(input_data, parameters):
|
|
| 519 |
input_data["input_text"] = (str) input text
|
| 520 |
"""
|
| 521 |
print("Starting preprocessing")
|
| 522 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 523 |
print("Stepwise Creation")
|
| 524 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 525 |
|
| 526 |
-
|
| 527 |
-
print("Chaining")
|
| 528 |
-
for text_label, text_body in input_data["input_text_pieces"].items():
|
| 529 |
-
if 'data_input' in text_label:
|
| 530 |
-
for parameter_name, parameter_value in parameters.items():
|
| 531 |
-
if 'pre_prompt' in parameter_name and parameter_value and not isinstance(parameter_value, bool) and text_body:
|
| 532 |
-
print("Text Label")
|
| 533 |
-
print(text_label)
|
| 534 |
-
print("Prompt followed by data entered")
|
| 535 |
-
print(parameter_value)
|
| 536 |
-
print(text_body)
|
| 537 |
-
response = client.chat.completions.create(
|
| 538 |
-
model=parameters["model_version"],
|
| 539 |
-
messages=[
|
| 540 |
-
{"role": "system", "content": parameter_value},
|
| 541 |
-
{"role": "user", "content": text_body}
|
| 542 |
-
]
|
| 543 |
-
)
|
| 544 |
-
|
| 545 |
-
response_text = response.choices[0].message.content
|
| 546 |
-
print("Response text")
|
| 547 |
-
print(response_text)
|
| 548 |
-
|
| 549 |
-
input_data["input_text_pieces"]["pre_processed_pieces"][text_label] = response_text
|
| 550 |
-
|
| 551 |
-
return input_data
|
| 552 |
|
| 553 |
-
|
| 554 |
-
|
| 555 |
-
|
| 556 |
-
input_text = input_data["input_text"]
|
| 557 |
-
pre_processing_list = [parameters["context_pre_prompt"], parameters["summary_pre_prompt"], parameters["conversation_pre_prompt"], parameters["example_pre_prompt"]]
|
| 558 |
-
|
| 559 |
-
print("PreProcessingList")
|
| 560 |
-
print(pre_processing_list)
|
| 561 |
-
for pre_prompt in pre_processing_list:
|
| 562 |
try:
|
| 563 |
print("Pre-Processing: ")
|
| 564 |
if pre_prompt:
|
|
@@ -589,25 +551,13 @@ def pre_processing(input_data, parameters):
|
|
| 589 |
|
| 590 |
input_data["input_text"] = input_text
|
| 591 |
return input_data
|
| 592 |
-
|
| 593 |
|
| 594 |
-
def process_specifications(
|
| 595 |
"""
|
| 596 |
Once the parameters and data are processed, do the pre-processing and then generate JSONs
|
| 597 |
|
| 598 |
Args:
|
| 599 |
-
input_data: (dict) The input data, preprocessed, from the user. Aka what will fill in the JSON
|
| 600 |
-
input_data["input_text"] = (str) the preprocessed input text
|
| 601 |
-
input_data["input_context"] = (str) depending on levers, empty or what is put in front of the prompt
|
| 602 |
-
input_data["input_text_pieces"] = (dict) containing the individual split up prompt pieces: field_data_input, planting_data_input, log_data_input, soil_data_input, yield_data_input
|
| 603 |
-
|
| 604 |
-
parameters: (dict) All of the individual parameters and "flippers"
|
| 605 |
-
parameters["pre_prompt"] = (bool) whether or not there is a pre-prompt to process through pre_processing()
|
| 606 |
-
parameters["model_version"] = (str) what model should be used
|
| 607 |
-
parameters["chaining"] = (bool) whether or not the preprocessed input context should be chained (given to multiple models)
|
| 608 |
-
parameters["context_pre_prompt"], parameters["summary_pre_prompt"], parameters["conversation_pre_prompt"], parameters["example_pre_prompt"] = (str) all of the pre-prompts, separated
|
| 609 |
-
parameters["combined_pre_prompt"] = (str) concatenated individual pre-prompts
|
| 610 |
-
parameters["additional_json_pieces_options"] = (str) "Explicit specific pieces" or "Parse from one big input text" to indicate whether it's many function calls on one input text or many function calls on smaller pieces of input texts
|
| 611 |
|
| 612 |
Returns:
|
| 613 |
3 processed data-filled JSON objects: farm_pretty_json, interactions_pretty_json, trial_pretty_json
|
|
@@ -619,172 +569,26 @@ def process_specifications(input_data, parameters):
|
|
| 619 |
print(parameters)
|
| 620 |
|
| 621 |
# here is where parsing and other things will happen before
|
| 622 |
-
|
| 623 |
-
|
| 624 |
-
|
|
|
|
| 625 |
print("You are continuing with pre_prompt processing")
|
| 626 |
-
processed_input = pre_processing(
|
| 627 |
else:
|
| 628 |
print("You have elsed into no pre-processing")
|
| 629 |
-
processed_input =
|
| 630 |
-
return generate_json_pieces(processed_input
|
| 631 |
-
elif
|
| 632 |
print("You are elifing into single json creation")
|
| 633 |
-
|
| 634 |
-
if parameters["pre_prompt"] == True:
|
| 635 |
print("You are preprocessing now")
|
| 636 |
-
processed_input = pre_processing(input_data
|
| 637 |
-
else:
|
| 638 |
print("You do not have any preprocessing now")
|
| 639 |
processed_input = input_data
|
| 640 |
-
return generate_json(processed_input
|
| 641 |
-
|
| 642 |
-
|
| 643 |
-
def parse_survey_stack_parameters(data):
|
| 644 |
-
"""
|
| 645 |
-
Parse the incoming parameters from the parameter survey
|
| 646 |
-
|
| 647 |
-
Args:
|
| 648 |
-
data: (json) JSON retrieved from surveystack API after retrieving survey info/details
|
| 649 |
-
|
| 650 |
-
Returns:
|
| 651 |
-
processed_data (dict)
|
| 652 |
-
processed_data["pre_prompt"] = (bool) whether or not there is a pre-prompt to process through pre_processing()
|
| 653 |
-
processed_data["model_version"] = (str) what model should be used
|
| 654 |
-
processed_data["chaining"] = (bool) whether or not the preprocessed input context should be chained (given to multiple models)
|
| 655 |
-
processed_data["context_pre_prompt"], parameters["summary_pre_prompt"], parameters["conversation_pre_prompt"], parameters["example_pre_prompt"] = (str) all of the pre-prompts, separated
|
| 656 |
-
processed_data["combined_pre_prompt"] = (str) concatenated individual pre-prompts
|
| 657 |
-
processed_data["additional_json_pieces_options"] = (str) "Explicit specific pieces" or "Parse from one big input text" to indicate whether it's many function calls on one input text or many function calls on smaller pieces of input texts
|
| 658 |
-
"""
|
| 659 |
-
processed_data = {}
|
| 660 |
-
|
| 661 |
-
processed_data["model_version"] = data[0]['data']['modelversion']['value'][0]
|
| 662 |
-
|
| 663 |
-
print("DATA: ")
|
| 664 |
-
print(data)
|
| 665 |
|
| 666 |
-
try:
|
| 667 |
-
|
| 668 |
-
print("Extracting parameters")
|
| 669 |
-
|
| 670 |
-
pre_promp_parameters = data[0]['data']['group_2']
|
| 671 |
-
|
| 672 |
-
if pre_promp_parameters['preprompt']['value'][0] == 'continue_preprompts':
|
| 673 |
-
processed_data["pre_prompt"] = True
|
| 674 |
-
|
| 675 |
-
# Accessing context and other prompts, with defaults in case they are None
|
| 676 |
-
processed_data["context_pre_prompt"] = pre_promp_parameters.get('contextpreprompt', {}).get('value', None)
|
| 677 |
-
processed_data["summary_pre_prompt"] = pre_promp_parameters.get('summarypreprompt', {}).get('value', None)
|
| 678 |
-
processed_data["conversation_pre_prompt"] = pre_promp_parameters.get('conversationpreprompt', {}).get('value', None)
|
| 679 |
-
processed_data["example_pre_prompt"] = pre_promp_parameters.get('examplepreprompt', {}).get('value', None)
|
| 680 |
-
|
| 681 |
-
# Check if chaining is set to "yes" or "no"
|
| 682 |
-
chaining_value = pre_promp_parameters.get('prepromptchaining', {}).get('value', [None])[0]
|
| 683 |
-
|
| 684 |
-
if chaining_value == "no":
|
| 685 |
-
# Combine prompts if chaining is "no"
|
| 686 |
-
combined_prompt = " ".join(
|
| 687 |
-
filter(None, [
|
| 688 |
-
processed_data["context_pre_prompt"],
|
| 689 |
-
processed_data["summary_pre_prompt"],
|
| 690 |
-
processed_data["conversation_pre_prompt"],
|
| 691 |
-
processed_data["example_pre_prompt"]
|
| 692 |
-
])
|
| 693 |
-
)
|
| 694 |
-
processed_data["chaining"] = False
|
| 695 |
-
processed_data["combined_pre_prompt"] = combined_prompt
|
| 696 |
-
else:
|
| 697 |
-
# Set combined_pre_prompt to None if chaining is enabled
|
| 698 |
-
processed_data["chaining"] = True
|
| 699 |
-
processed_data["combined_pre_prompt"] = None
|
| 700 |
-
else:
|
| 701 |
-
# Set fields to None if preprompt is not "continue_preprompts"
|
| 702 |
-
processed_data["pre_prompt"] = False
|
| 703 |
-
processed_data["context_pre_prompt"] = None
|
| 704 |
-
processed_data["summary_pre_prompt"] = None
|
| 705 |
-
processed_data["conversation_pre_prompt"] = None
|
| 706 |
-
processed_data["example_pre_prompt"] = None
|
| 707 |
-
processed_data["chaining"] = False
|
| 708 |
-
processed_data["combined_pre_prompt"] = None
|
| 709 |
-
|
| 710 |
-
except Exception as e:
|
| 711 |
-
print(f"An error occurred: {e}")
|
| 712 |
-
|
| 713 |
-
print("Done Extracting parameters:")
|
| 714 |
-
print(str(processed_data))
|
| 715 |
-
return processed_data
|
| 716 |
-
|
| 717 |
-
def parse_survey_stack_data(data):
|
| 718 |
-
"""
|
| 719 |
-
Parse the incoming data from the survey stack survey
|
| 720 |
-
|
| 721 |
-
Args:
|
| 722 |
-
data: (json) JSON retrieved from surveystack API after retrieving survey info/details
|
| 723 |
-
|
| 724 |
-
Returns:
|
| 725 |
-
processed_data
|
| 726 |
-
processed_data["input_text"] = (str) the raw input text
|
| 727 |
-
"""
|
| 728 |
-
print("PROCESSING SURVEY STACK DATA")
|
| 729 |
-
processed_data = {}
|
| 730 |
-
|
| 731 |
-
print("JUST PRINTING OUT THE DATA FOR YA")
|
| 732 |
-
|
| 733 |
-
print(data)
|
| 734 |
-
|
| 735 |
-
|
| 736 |
-
processed_data["stepwise_json_creation"] = data[0]['data']['stepwisejsoncreation']['value']
|
| 737 |
-
print("STEPWISE?: " + str(processed_data["stepwise_json_creation"]))
|
| 738 |
-
|
| 739 |
-
if processed_data["stepwise_json_creation"][0] == "stepwisejsoncreation":
|
| 740 |
-
print("IN THE STEP")
|
| 741 |
-
farm_management_inputs = data[0]['data']['group_4']
|
| 742 |
-
print("FARM MANAGEMENT INPUTS" + str(farm_management_inputs))
|
| 743 |
-
|
| 744 |
-
processed_data["input_text_pieces"] = {}
|
| 745 |
-
processed_data["input_text_pieces"]["field_data_input"] = farm_management_inputs.get('field_data_input', {}).get('value', None)
|
| 746 |
-
processed_data["input_text_pieces"]["planting_data_input"] = farm_management_inputs.get('planting_data_input', {}).get('value', None)
|
| 747 |
-
processed_data["input_text_pieces"]["log_data_input"] = farm_management_inputs.get('log_data_input', {}).get('value', None)
|
| 748 |
-
processed_data["input_text_pieces"]["soil_data_input"] = farm_management_inputs.get('soil_data_input', {}).get('value', None)
|
| 749 |
-
processed_data["input_text_pieces"]["yield_data_input"] = farm_management_inputs.get('yield_data_input', {}).get('value', None)
|
| 750 |
-
processed_data["input_text"] = "EMPTY"
|
| 751 |
-
|
| 752 |
-
print("NEXT SCHEMA INPUTS")
|
| 753 |
-
interactions_inputs = data[0]['data']['group_5']
|
| 754 |
-
print("INTERACTIONS INPUTS" + str(interactions_inputs))
|
| 755 |
-
processed_data["input_text_pieces"]["interaction_data_input"] = interactions_inputs.get('interaction_data_input', {}).get('value', None)
|
| 756 |
-
processed_data["input_text_pieces"]["person_data_input"] = interactions_inputs.get('person_data_input', {}).get('value', None)
|
| 757 |
-
|
| 758 |
-
print("NEXT SCHEMA INPUTS 2")
|
| 759 |
-
trials_inputs = data[0]['data']['group_6']
|
| 760 |
-
print("TRIALS INPUTS" + str(trials_inputs))
|
| 761 |
-
processed_data["input_text_pieces"]["trial_data_input"] = trials_inputs.get('trial_data_input', {}).get('value', None)
|
| 762 |
-
processed_data["input_text_pieces"]["treatment_data_input"] = trials_inputs.get('treatment_data_input', {}).get('value', None)
|
| 763 |
-
|
| 764 |
-
|
| 765 |
-
elif processed_data["stepwise_json_creation"][0] == "singlejsoncreation":
|
| 766 |
-
print("IN THE SINGLE")
|
| 767 |
-
processed_data["input_text"] = data[0]['data']['onelonginputtext']['value']
|
| 768 |
-
print(processed_data["input_text"])
|
| 769 |
-
|
| 770 |
-
processed_data["input_text_pieces"] = {}
|
| 771 |
-
processed_data["input_text_pieces"]["field_data_input"] = "EMPTY"
|
| 772 |
-
processed_data["input_text_pieces"]["planting_data_input"] = "EMPTY"
|
| 773 |
-
processed_data["input_text_pieces"]["log_data_input"] = "EMPTY"
|
| 774 |
-
processed_data["input_text_pieces"]["soil_data_input"] = "EMPTY"
|
| 775 |
-
processed_data["input_text_pieces"]["yield_data_input"] = "EMPTY"
|
| 776 |
-
|
| 777 |
-
processed_data["input_text_pieces"]["interaction_data_input"] = "EMPTY"
|
| 778 |
-
processed_data["input_text_pieces"]["person_data_input"] = "EMPTY"
|
| 779 |
-
|
| 780 |
-
processed_data["input_text_pieces"]["trial_data_input"] = "EMPTY"
|
| 781 |
-
processed_data["input_text_pieces"]["treatment_data_input"] = "EMPTY"
|
| 782 |
-
|
| 783 |
-
print("RETURNING DATA")
|
| 784 |
-
print(processed_data)
|
| 785 |
-
|
| 786 |
-
return processed_data
|
| 787 |
-
|
| 788 |
def parse_survey_stack(data):
|
| 789 |
"""
|
| 790 |
Parse the incoming data from the survey stack survey
|
|
@@ -847,7 +651,7 @@ def parse_survey_stack(data):
|
|
| 847 |
processed_data["prompts"]["second_schema_prompt_two"] = parameter_inputs.get('secondschemaprompt2', {}).get('value', None)
|
| 848 |
processed_data["prompts"]["third_schema_prompt_one"] = parameter_inputs.get('thirdschemaprompt1', {}).get('value', None)
|
| 849 |
processed_data["prompts"]["third_schema_prompt_two"] = parameter_inputs.get('thirdschemaprompt2', {}).get('value', None)
|
| 850 |
-
|
| 851 |
elif processed_data["inputstyle"] == "big-block-input-text":
|
| 852 |
print("IN THE SINGLE")
|
| 853 |
processed_data["input_text"] = data[0]['data']['onelonginputtext']['value']
|
|
@@ -867,7 +671,7 @@ def parse_survey_stack(data):
|
|
| 867 |
|
| 868 |
if processed_data["parameters"]["pre_process"] == "yes":
|
| 869 |
processed_data["parameters"]["multiple_pre_prompts"] = data[0]['data']['multiplepreprompts']['value'] # this is a value in string words
|
| 870 |
-
processed_data["parameters"]["
|
| 871 |
processed_data["parameters"]["pre_prompt_first"] = data[0]['data']['preprocessingprompt1']['value']
|
| 872 |
processed_data["parameters"]["pre_prompt_second"] = data[0]['data']['preprocessingprompt2'].get('value', None)
|
| 873 |
processed_data["parameters"]["pre_prompt_third"] = data[0]['data']['preprocessingprompt3'].get('value', None)
|
|
|
|
| 87 |
# 'prompt' = ['prompts': ["prompt1": "value", "prompt2", "value"]]
|
| 88 |
#}
|
| 89 |
|
| 90 |
+
def generate_json(processed_data):
|
|
|
|
| 91 |
"""
|
| 92 |
Function to prompt OpenAI API to generate structured JSON output.
|
| 93 |
|
| 94 |
Args:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 95 |
|
| 96 |
Returns:
|
| 97 |
3 processed data-filled JSON objects: farm_pretty_json, interactions_pretty_json, trial_pretty_json
|
| 98 |
"""
|
| 99 |
print("Generating JSON Whole!")
|
| 100 |
+
input_text = processed_data["input_text"]
|
| 101 |
+
model_version = processed_data["parameters"]["model_version"]
|
| 102 |
|
| 103 |
+
farm_prompt = processed_data["prompts"]["first_schema_prompt"]
|
| 104 |
+
interactions_prompt = processed_data["prompts"]["second_schema_prompt"]
|
| 105 |
+
trial_prompt = processed_data["prompts"]["third_schema_prompt"]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 106 |
|
| 107 |
|
| 108 |
try:
|
|
|
|
| 164 |
except Exception as e:
|
| 165 |
return {"error": "Failed to generate valid JSON. " + str(e)}
|
| 166 |
|
| 167 |
+
|
| 168 |
# This is for the step-wise JSON creation
|
| 169 |
+
def generate_json_pieces(processed_data):
|
| 170 |
"""
|
| 171 |
This is primarily for one of the flippers, which allows each individual JSON section to be created individually, then concatenates them all together.
|
| 172 |
It is proposed that perhaps the individual calls to the model will be more robust than giving the model all the data at once.
|
|
|
|
| 191 |
print("Generating JSON Pieces!")
|
| 192 |
|
| 193 |
print("INPUT DATA")
|
| 194 |
+
print(processed_data)
|
|
|
|
|
|
|
| 195 |
|
| 196 |
+
model_version = processed_data["parameters"]["model_version"]
|
|
|
|
| 197 |
|
| 198 |
+
print("Model Version")
|
|
|
|
| 199 |
print(model_version)
|
| 200 |
|
| 201 |
+
if processed_data["parameters"]["pre_process"] == "yes":
|
| 202 |
print("Pre prompt is true")
|
| 203 |
field_data_input = input_data["input_text_pieces"]["pre_processed_pieces"]["field_data_input"]
|
| 204 |
planting_data_input = input_data["input_text_pieces"]["pre_processed_pieces"]["planting_data_input"]
|
|
|
|
| 228 |
|
| 229 |
# Fix these prompts for all
|
| 230 |
print("Setting prompts")
|
| 231 |
+
field_prompt = processed_data["prompts"]["first_schema_prompt_one"]
|
| 232 |
+
plant_prompt = processed_data["prompts"]["first_schema_prompt_two"]
|
| 233 |
+
log_prompt = processed_data["prompts"]["first_schema_prompt_three"]
|
| 234 |
+
soil_prompt = processed_data["prompts"]["first_schema_prompt_four"]
|
| 235 |
+
yield_prompt = processed_data["prompts"]["first_schema_prompt_five"]
|
| 236 |
+
|
| 237 |
+
interaction_prompt = processed_data["prompts"]["second_schema_prompt_one"]
|
| 238 |
+
person_prompt = processed_data["prompts"]["second_schema_prompt_two"]
|
| 239 |
+
|
| 240 |
+
trial_prompt = processed_data["prompts"]["third_schema_prompt_one"]
|
| 241 |
+
treatment_prompt = processed_data["prompts"]["third_schema_prompt_two"]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 242 |
|
|
|
|
|
|
|
| 243 |
|
| 244 |
try:
|
| 245 |
# Call OpenAI API to generate structured output based on prompt
|
|
|
|
| 466 |
return {"error": "Failed to generate valid JSON. " + str(e)}
|
| 467 |
|
| 468 |
|
| 469 |
+
def pre_processing(processed_data):
|
| 470 |
"""
|
| 471 |
In the event there's a pre-prompt, process the pre-prompts and input text accordingly
|
| 472 |
|
| 473 |
Args:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 474 |
|
| 475 |
Returns:
|
| 476 |
(dict) input_data
|
|
|
|
| 478 |
input_data["input_text"] = (str) input text
|
| 479 |
"""
|
| 480 |
print("Starting preprocessing")
|
| 481 |
+
|
| 482 |
+
|
| 483 |
+
pre_processing_list = [processed_data.get('parameters', {}).get('pre_prompt_first', None), processed_data.get('parameters', {}).get('pre_prompt_second', None), processed_data.get('parameters', {}).get('pre_prompt_third', None)]
|
| 484 |
+
|
| 485 |
+
print("Preprocessing list")
|
| 486 |
+
print(pre_processing_list)
|
| 487 |
+
|
| 488 |
+
if processed_data["inputstyle"] == "individual-pieces-input-text":
|
| 489 |
print("Stepwise Creation")
|
| 490 |
+
processed_data["input_text_pieces"]["pre_processed_pieces"] = {}
|
| 491 |
+
|
| 492 |
+
for text_label, text_body in processed_data["input_text_pieces"].items():
|
| 493 |
+
if 'data_input' in text_label:
|
| 494 |
+
processed_data["parameters"]["pre_prompt_first"]
|
| 495 |
+
# change this to pre-processing list
|
| 496 |
+
for pre_prompt in pre_processing_list:
|
| 497 |
+
if pre_process_task:
|
| 498 |
+
print("Text Label")
|
| 499 |
+
print(text_label)
|
| 500 |
+
print("Prompt followed by data entered")
|
| 501 |
+
print(parameter_value)
|
| 502 |
+
print(text_body)
|
| 503 |
+
response = client.chat.completions.create(
|
| 504 |
+
model=processed_data["parameters"]["model_version"],
|
| 505 |
+
messages=[
|
| 506 |
+
{"role": "system", "content": pre_process_task},
|
| 507 |
+
{"role": "user", "content": text_body}
|
| 508 |
+
]
|
| 509 |
+
)
|
| 510 |
+
|
| 511 |
+
response_text = response.choices[0].message.content
|
| 512 |
+
print("Response text")
|
| 513 |
+
print(response_text)
|
| 514 |
+
|
| 515 |
+
input_data["input_text_pieces"]["pre_processed_pieces"][text_label] = response_text
|
| 516 |
+
|
| 517 |
+
return input_data
|
| 518 |
|
| 519 |
+
if processed_data["inputstyle"] == "big-block-input-text":
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 520 |
|
| 521 |
+
input_text = input_data["input_text"]
|
| 522 |
+
|
| 523 |
+
for pre_prompt in pre_processing_list:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 524 |
try:
|
| 525 |
print("Pre-Processing: ")
|
| 526 |
if pre_prompt:
|
|
|
|
| 551 |
|
| 552 |
input_data["input_text"] = input_text
|
| 553 |
return input_data
|
| 554 |
+
|
| 555 |
|
| 556 |
+
def process_specifications(processed_data):
|
| 557 |
"""
|
| 558 |
Once the parameters and data are processed, do the pre-processing and then generate JSONs
|
| 559 |
|
| 560 |
Args:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 561 |
|
| 562 |
Returns:
|
| 563 |
3 processed data-filled JSON objects: farm_pretty_json, interactions_pretty_json, trial_pretty_json
|
|
|
|
| 569 |
print(parameters)
|
| 570 |
|
| 571 |
# here is where parsing and other things will happen before
|
| 572 |
+
|
| 573 |
+
if processed_data["inputstyle"] == "individual-pieces-input-text":
|
| 574 |
+
print("You are continuing with step-wise creation with your individual text pieces")
|
| 575 |
+
if processed_data["parameters"]["pre_process"] == "yes":
|
| 576 |
print("You are continuing with pre_prompt processing")
|
| 577 |
+
processed_input = pre_processing(processed_data)
|
| 578 |
else:
|
| 579 |
print("You have elsed into no pre-processing")
|
| 580 |
+
processed_input = processed_data
|
| 581 |
+
return generate_json_pieces(processed_input)
|
| 582 |
+
elif processed_data["inputstyle"] == "big-block-input-text":
|
| 583 |
print("You are elifing into single json creation")
|
| 584 |
+
if processed_data["parameters"]["pre_process"] == "yes":
|
|
|
|
| 585 |
print("You are preprocessing now")
|
| 586 |
+
processed_input = pre_processing(input_data)
|
| 587 |
+
else:
|
| 588 |
print("You do not have any preprocessing now")
|
| 589 |
processed_input = input_data
|
| 590 |
+
return generate_json(processed_input)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 591 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 592 |
def parse_survey_stack(data):
|
| 593 |
"""
|
| 594 |
Parse the incoming data from the survey stack survey
|
|
|
|
| 651 |
processed_data["prompts"]["second_schema_prompt_two"] = parameter_inputs.get('secondschemaprompt2', {}).get('value', None)
|
| 652 |
processed_data["prompts"]["third_schema_prompt_one"] = parameter_inputs.get('thirdschemaprompt1', {}).get('value', None)
|
| 653 |
processed_data["prompts"]["third_schema_prompt_two"] = parameter_inputs.get('thirdschemaprompt2', {}).get('value', None)
|
| 654 |
+
|
| 655 |
elif processed_data["inputstyle"] == "big-block-input-text":
|
| 656 |
print("IN THE SINGLE")
|
| 657 |
processed_data["input_text"] = data[0]['data']['onelonginputtext']['value']
|
|
|
|
| 671 |
|
| 672 |
if processed_data["parameters"]["pre_process"] == "yes":
|
| 673 |
processed_data["parameters"]["multiple_pre_prompts"] = data[0]['data']['multiplepreprompts']['value'] # this is a value in string words
|
| 674 |
+
processed_data["parameters"]["preprompt_style"] = data[0]['data']['prepromptstyle']['value'] # this is a list
|
| 675 |
processed_data["parameters"]["pre_prompt_first"] = data[0]['data']['preprocessingprompt1']['value']
|
| 676 |
processed_data["parameters"]["pre_prompt_second"] = data[0]['data']['preprocessingprompt2'].get('value', None)
|
| 677 |
processed_data["parameters"]["pre_prompt_third"] = data[0]['data']['preprocessingprompt3'].get('value', None)
|