Update process_data.py
Browse files- process_data.py +114 -6
process_data.py
CHANGED
|
@@ -9,8 +9,10 @@ import json
|
|
| 9 |
|
| 10 |
from schema_classes import FarmActivities, Interactions, Trial, FarmActivitiesLite, PlantingLite, Log, Soil, Yield
|
| 11 |
|
| 12 |
-
|
| 13 |
-
#
|
|
|
|
|
|
|
| 14 |
os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")
|
| 15 |
client = OpenAI()
|
| 16 |
|
|
@@ -18,6 +20,19 @@ client = OpenAI()
|
|
| 18 |
def generate_json(input_data, parameters):
|
| 19 |
"""
|
| 20 |
Function to prompt OpenAI API to generate structured JSON output.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 21 |
"""
|
| 22 |
|
| 23 |
input_text = input_data["input_text"]
|
|
@@ -33,7 +48,6 @@ def generate_json(input_data, parameters):
|
|
| 33 |
trial_prompt = input_data["input_context"] + trial_prompt
|
| 34 |
|
| 35 |
|
| 36 |
-
|
| 37 |
try:
|
| 38 |
#Call OpenAI API to generate structured output based on prompt
|
| 39 |
|
|
@@ -94,7 +108,36 @@ def generate_json(input_data, parameters):
|
|
| 94 |
return {"error": "Failed to generate valid JSON. " + str(e)}
|
| 95 |
|
| 96 |
# This is for the step-wise JSON creation
|
| 97 |
-
def generate_json_pieces(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 98 |
|
| 99 |
if additional_json_creation_options == "Explicit specific pieces":
|
| 100 |
field_data_specification = field_data_input
|
|
@@ -188,7 +231,27 @@ def generate_json_pieces(specification, model_version, additional_json_creation_
|
|
| 188 |
# return output1, output2, output3
|
| 189 |
|
| 190 |
def pre_processing(input_data, parameters):
|
| 191 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 192 |
|
| 193 |
if parameters["chaining"]:
|
| 194 |
|
|
@@ -238,6 +301,26 @@ def pre_processing(input_data, parameters):
|
|
| 238 |
|
| 239 |
|
| 240 |
def process_specifications(input_data, parameters):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 241 |
# here is where parsing and other things will happen before
|
| 242 |
if parameters["pre_prompt"] == True:
|
| 243 |
processed_input = pre_processing(input_data, parameters)
|
|
@@ -249,6 +332,21 @@ def process_specifications(input_data, parameters):
|
|
| 249 |
|
| 250 |
|
| 251 |
def parse_survey_stack_parameters(data):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 252 |
processed_data = {}
|
| 253 |
|
| 254 |
processed_data["model_version"] = data[0]['data']['modelversion']['value'][0]
|
|
@@ -283,7 +381,7 @@ def parse_survey_stack_parameters(data):
|
|
| 283 |
])
|
| 284 |
)
|
| 285 |
processed_data["chaining"] = False
|
| 286 |
-
processed_data["
|
| 287 |
else:
|
| 288 |
# Set combined_pre_prompt to None if chaining is enabled
|
| 289 |
processed_data["chaining"] = True
|
|
@@ -304,6 +402,16 @@ def parse_survey_stack_parameters(data):
|
|
| 304 |
return processed_data
|
| 305 |
|
| 306 |
def parse_survey_stack_data(data):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 307 |
processed_data = {}
|
| 308 |
|
| 309 |
processed_data["input_text"] = data[0]['data']['inputtext']['value']
|
|
|
|
| 9 |
|
| 10 |
from schema_classes import FarmActivities, Interactions, Trial, FarmActivitiesLite, PlantingLite, Log, Soil, Yield
|
| 11 |
|
| 12 |
+
|
| 13 |
+
# This API key must be in a "secret" in your environment. This is generated from OpenAI or the company's website that creates the model you wish to engage with.
|
| 14 |
+
# To use other models, some other endpoints would need to slightly change
|
| 15 |
+
# As is, the endpoint used requires a model that is capable of OpenAI's structured outputs.
|
| 16 |
os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")
|
| 17 |
client = OpenAI()
|
| 18 |
|
|
|
|
| 20 |
def generate_json(input_data, parameters):
|
| 21 |
"""
|
| 22 |
Function to prompt OpenAI API to generate structured JSON output.
|
| 23 |
+
|
| 24 |
+
Args:
|
| 25 |
+
input_data: (dict) The input data, preprocessed, from the user. Aka what will fill in the JSON
|
| 26 |
+
input_data["input_text"] = the preprocessed input text
|
| 27 |
+
input_data["input_context"] = depending on levers, empty or what is put in front of the prompt
|
| 28 |
+
parameters: (dict) All of the individual parameters and "flippers"
|
| 29 |
+
parameters["model_version"] = (str) what model should be used
|
| 30 |
+
parameters["chaining"] = (bool) whether or not the preprocessed input context should be chained (given to multiple models)
|
| 31 |
+
parameters["context_pre_prompt"], parameters["summary_pre_prompt"], parameters["conversation_pre_prompt"], parameters["example_pre_prompt"] = (str) all of the pre-prompts, separated
|
| 32 |
+
parameters["combined_pre_prompt"] = (str) concatenated individual pre-prompts
|
| 33 |
+
|
| 34 |
+
Returns:
|
| 35 |
+
3 processed data-filled JSON objects: farm_pretty_json, interactions_pretty_json, trial_pretty_json
|
| 36 |
"""
|
| 37 |
|
| 38 |
input_text = input_data["input_text"]
|
|
|
|
| 48 |
trial_prompt = input_data["input_context"] + trial_prompt
|
| 49 |
|
| 50 |
|
|
|
|
| 51 |
try:
|
| 52 |
#Call OpenAI API to generate structured output based on prompt
|
| 53 |
|
|
|
|
| 108 |
return {"error": "Failed to generate valid JSON. " + str(e)}
|
| 109 |
|
| 110 |
# This is for the step-wise JSON creation
|
| 111 |
+
def generate_json_pieces(input_data, parameters):
|
| 112 |
+
"""
|
| 113 |
+
This is primarily for one of the flippers, which allows each individual JSON section to be created individually, then concatenates them all together.
|
| 114 |
+
It is proposed that perhaps the individual calls to the model will be more robust than giving the model all the data at once.
|
| 115 |
+
|
| 116 |
+
Args:
|
| 117 |
+
Args:
|
| 118 |
+
input_data: (dict) The input data, preprocessed, from the user. Aka what will fill in the JSON
|
| 119 |
+
input_data["input_text"] = (str) the preprocessed input text
|
| 120 |
+
input_data["input_context"] = (str) depending on levers, empty or what is put in front of the prompt
|
| 121 |
+
input_data["input_text_pieces"] = (dict) containing the individual split up prompt pieces: field_data_input, planting_data_input, logs_data_input, soil_data_input, yield_data_input
|
| 122 |
+
|
| 123 |
+
parameters: (dict) All of the individual parameters and "flippers"
|
| 124 |
+
parameters["model_version"] = (str) what model should be used
|
| 125 |
+
parameters["chaining"] = (bool) whether or not the preprocessed input context should be chained (given to multiple models)
|
| 126 |
+
parameters["context_pre_prompt"], parameters["summary_pre_prompt"], parameters["conversation_pre_prompt"], parameters["example_pre_prompt"] = (str) all of the pre-prompts, separated
|
| 127 |
+
parameters["combined_pre_prompt"] = (str) concatenated individual pre-prompts
|
| 128 |
+
parameters["additional_json_pieces_options"] = (str) "Explicit specific pieces" or "Parse from one big input text" to indicate whether it's many function calls on one input text or many function calls on smaller pieces of input texts
|
| 129 |
+
|
| 130 |
+
Returns:
|
| 131 |
+
(str - json) A final combined JSON containing the data filled schema for Farm Activites
|
| 132 |
+
"""
|
| 133 |
+
specification = input_data["input_text"]
|
| 134 |
+
model_version = parameters["model_version"]
|
| 135 |
+
additional_json_creation_options = parameters["additional_json_pieces_options"]
|
| 136 |
+
field_data_input = input_data["input_text_pieces"]["field_data_input"]
|
| 137 |
+
planting_data_input = input_data["input_text_pieces"]["planting_data_input"]
|
| 138 |
+
logs_data_input = input_data["input_text_pieces"]["logs_data_input"]
|
| 139 |
+
soil_data_input = input_data["input_text_pieces"]["soil_data_input"]
|
| 140 |
+
yield_data_input = input_data["input_text_pieces"]["yield_data_input"]
|
| 141 |
|
| 142 |
if additional_json_creation_options == "Explicit specific pieces":
|
| 143 |
field_data_specification = field_data_input
|
|
|
|
| 231 |
# return output1, output2, output3
|
| 232 |
|
| 233 |
def pre_processing(input_data, parameters):
|
| 234 |
+
"""
|
| 235 |
+
In the event there's a pre-prompt, process the pre-prompts and input text accordingly
|
| 236 |
+
|
| 237 |
+
Args:
|
| 238 |
+
input_data: (dict) The input data, preprocessed, from the user. Aka what will fill in the JSON
|
| 239 |
+
input_data["input_text"] = (str) the preprocessed input text
|
| 240 |
+
input_data["input_context"] = (str) depending on levers, empty or what is put in front of the prompt
|
| 241 |
+
input_data["input_text_pieces"] = (dict) containing the individual split up prompt pieces: field_data_input, planting_data_input, logs_data_input, soil_data_input, yield_data_input
|
| 242 |
+
|
| 243 |
+
parameters: (dict) All of the individual parameters and "flippers"
|
| 244 |
+
parameters["model_version"] = (str) what model should be used
|
| 245 |
+
parameters["chaining"] = (bool) whether or not the preprocessed input context should be chained (given to multiple models)
|
| 246 |
+
parameters["context_pre_prompt"], parameters["summary_pre_prompt"], parameters["conversation_pre_prompt"], parameters["example_pre_prompt"] = (str) all of the pre-prompts, separated
|
| 247 |
+
parameters["combined_pre_prompt"] = (str) concatenated individual pre-prompts
|
| 248 |
+
parameters["additional_json_pieces_options"] = (str) "Explicit specific pieces" or "Parse from one big input text" to indicate whether it's many function calls on one input text or many function calls on smaller pieces of input texts
|
| 249 |
+
|
| 250 |
+
Returns:
|
| 251 |
+
(dict) input_data
|
| 252 |
+
input_data["input_context"] = (bool) whether the input text should be used as context or not
|
| 253 |
+
input_data["input_text"] = (str) input text
|
| 254 |
+
"""
|
| 255 |
|
| 256 |
if parameters["chaining"]:
|
| 257 |
|
|
|
|
| 301 |
|
| 302 |
|
| 303 |
def process_specifications(input_data, parameters):
|
| 304 |
+
"""
|
| 305 |
+
Once the parameters and data are processed, do the pre-processing and then generate JSONs
|
| 306 |
+
|
| 307 |
+
Args:
|
| 308 |
+
input_data: (dict) The input data, preprocessed, from the user. Aka what will fill in the JSON
|
| 309 |
+
input_data["input_text"] = (str) the preprocessed input text
|
| 310 |
+
input_data["input_context"] = (str) depending on levers, empty or what is put in front of the prompt
|
| 311 |
+
input_data["input_text_pieces"] = (dict) containing the individual split up prompt pieces: field_data_input, planting_data_input, logs_data_input, soil_data_input, yield_data_input
|
| 312 |
+
|
| 313 |
+
parameters: (dict) All of the individual parameters and "flippers"
|
| 314 |
+
parameters["pre_prompt"] = (bool) whether or not there is a pre-prompt to process through pre_processing()
|
| 315 |
+
parameters["model_version"] = (str) what model should be used
|
| 316 |
+
parameters["chaining"] = (bool) whether or not the preprocessed input context should be chained (given to multiple models)
|
| 317 |
+
parameters["context_pre_prompt"], parameters["summary_pre_prompt"], parameters["conversation_pre_prompt"], parameters["example_pre_prompt"] = (str) all of the pre-prompts, separated
|
| 318 |
+
parameters["combined_pre_prompt"] = (str) concatenated individual pre-prompts
|
| 319 |
+
parameters["additional_json_pieces_options"] = (str) "Explicit specific pieces" or "Parse from one big input text" to indicate whether it's many function calls on one input text or many function calls on smaller pieces of input texts
|
| 320 |
+
|
| 321 |
+
Returns:
|
| 322 |
+
3 processed data-filled JSON objects: farm_pretty_json, interactions_pretty_json, trial_pretty_json
|
| 323 |
+
"""
|
| 324 |
# here is where parsing and other things will happen before
|
| 325 |
if parameters["pre_prompt"] == True:
|
| 326 |
processed_input = pre_processing(input_data, parameters)
|
|
|
|
| 332 |
|
| 333 |
|
| 334 |
def parse_survey_stack_parameters(data):
|
| 335 |
+
"""
|
| 336 |
+
Parse the incoming parameters from the parameter survey
|
| 337 |
+
|
| 338 |
+
Args:
|
| 339 |
+
data: (json) JSON retrieved from surveystack API after retrieving survey info/details
|
| 340 |
+
|
| 341 |
+
Returns:
|
| 342 |
+
processed_data (dict)
|
| 343 |
+
processed_data["pre_prompt"] = (bool) whether or not there is a pre-prompt to process through pre_processing()
|
| 344 |
+
processed_data["model_version"] = (str) what model should be used
|
| 345 |
+
processed_data["chaining"] = (bool) whether or not the preprocessed input context should be chained (given to multiple models)
|
| 346 |
+
processed_data["context_pre_prompt"], parameters["summary_pre_prompt"], parameters["conversation_pre_prompt"], parameters["example_pre_prompt"] = (str) all of the pre-prompts, separated
|
| 347 |
+
processed_data["combined_pre_prompt"] = (str) concatenated individual pre-prompts
|
| 348 |
+
processed_data["additional_json_pieces_options"] = (str) "Explicit specific pieces" or "Parse from one big input text" to indicate whether it's many function calls on one input text or many function calls on smaller pieces of input texts
|
| 349 |
+
"""
|
| 350 |
processed_data = {}
|
| 351 |
|
| 352 |
processed_data["model_version"] = data[0]['data']['modelversion']['value'][0]
|
|
|
|
| 381 |
])
|
| 382 |
)
|
| 383 |
processed_data["chaining"] = False
|
| 384 |
+
processed_data["combined_pre_prompt"] = combined_prompt
|
| 385 |
else:
|
| 386 |
# Set combined_pre_prompt to None if chaining is enabled
|
| 387 |
processed_data["chaining"] = True
|
|
|
|
| 402 |
return processed_data
|
| 403 |
|
| 404 |
def parse_survey_stack_data(data):
|
| 405 |
+
"""
|
| 406 |
+
Parse the incoming data from the survey stack survey
|
| 407 |
+
|
| 408 |
+
Args:
|
| 409 |
+
data: (json) JSON retrieved from surveystack API after retrieving survey info/details
|
| 410 |
+
|
| 411 |
+
Returns:
|
| 412 |
+
processed_data
|
| 413 |
+
processed_data["input_text"] = (str) the raw input text
|
| 414 |
+
"""
|
| 415 |
processed_data = {}
|
| 416 |
|
| 417 |
processed_data["input_text"] = data[0]['data']['inputtext']['value']
|