rosemariafontana commited on
Commit
915b500
·
verified ·
1 Parent(s): 9eec8db

Update process_data.py

Browse files
Files changed (1) hide show
  1. process_data.py +114 -6
process_data.py CHANGED
@@ -9,8 +9,10 @@ import json
9
 
10
  from schema_classes import FarmActivities, Interactions, Trial, FarmActivitiesLite, PlantingLite, Log, Soil, Yield
11
 
12
- # adding comment
13
- # Chatbot model
 
 
14
  os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")
15
  client = OpenAI()
16
 
@@ -18,6 +20,19 @@ client = OpenAI()
18
  def generate_json(input_data, parameters):
19
  """
20
  Function to prompt OpenAI API to generate structured JSON output.
 
 
 
 
 
 
 
 
 
 
 
 
 
21
  """
22
 
23
  input_text = input_data["input_text"]
@@ -33,7 +48,6 @@ def generate_json(input_data, parameters):
33
  trial_prompt = input_data["input_context"] + trial_prompt
34
 
35
 
36
-
37
  try:
38
  #Call OpenAI API to generate structured output based on prompt
39
 
@@ -94,7 +108,36 @@ def generate_json(input_data, parameters):
94
  return {"error": "Failed to generate valid JSON. " + str(e)}
95
 
96
  # This is for the step-wise JSON creation
97
- def generate_json_pieces(specification, model_version, additional_json_creation_options, field_data_input, planting_data_input, logs_data_input, soil_data_input, yield_data_input):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
98
 
99
  if additional_json_creation_options == "Explicit specific pieces":
100
  field_data_specification = field_data_input
@@ -188,7 +231,27 @@ def generate_json_pieces(specification, model_version, additional_json_creation_
188
  # return output1, output2, output3
189
 
190
  def pre_processing(input_data, parameters):
191
- # in the event there's a pre-prompt, process
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
192
 
193
  if parameters["chaining"]:
194
 
@@ -238,6 +301,26 @@ def pre_processing(input_data, parameters):
238
 
239
 
240
  def process_specifications(input_data, parameters):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
241
  # here is where parsing and other things will happen before
242
  if parameters["pre_prompt"] == True:
243
  processed_input = pre_processing(input_data, parameters)
@@ -249,6 +332,21 @@ def process_specifications(input_data, parameters):
249
 
250
 
251
  def parse_survey_stack_parameters(data):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
252
  processed_data = {}
253
 
254
  processed_data["model_version"] = data[0]['data']['modelversion']['value'][0]
@@ -283,7 +381,7 @@ def parse_survey_stack_parameters(data):
283
  ])
284
  )
285
  processed_data["chaining"] = False
286
- processed_data["combined_prompt"] = combined_prompt
287
  else:
288
  # Set combined_pre_prompt to None if chaining is enabled
289
  processed_data["chaining"] = True
@@ -304,6 +402,16 @@ def parse_survey_stack_parameters(data):
304
  return processed_data
305
 
306
  def parse_survey_stack_data(data):
 
 
 
 
 
 
 
 
 
 
307
  processed_data = {}
308
 
309
  processed_data["input_text"] = data[0]['data']['inputtext']['value']
 
9
 
10
  from schema_classes import FarmActivities, Interactions, Trial, FarmActivitiesLite, PlantingLite, Log, Soil, Yield
11
 
12
+
13
+ # This API key must be in a "secret" in your environment. This is generated from OpenAI or the company's website that creates the model you wish to engage with.
14
+ # To use other models, some other endpoints would need to slightly change
15
+ # As is, the endpoint used requires a model that is capable of OpenAI's structured outputs.
16
  os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")
17
  client = OpenAI()
18
 
 
20
  def generate_json(input_data, parameters):
21
  """
22
  Function to prompt OpenAI API to generate structured JSON output.
23
+
24
+ Args:
25
+ input_data: (dict) The input data, preprocessed, from the user. Aka what will fill in the JSON
26
+ input_data["input_text"] = the preprocessed input text
27
+ input_data["input_context"] = depending on levers, empty or what is put in front of the prompt
28
+ parameters: (dict) All of the individual parameters and "flippers"
29
+ parameters["model_version"] = (str) what model should be used
30
+ parameters["chaining"] = (bool) whether or not the preprocessed input context should be chained (given to multiple models)
31
+ parameters["context_pre_prompt"], parameters["summary_pre_prompt"], parameters["conversation_pre_prompt"], parameters["example_pre_prompt"] = (str) all of the pre-prompts, separated
32
+ parameters["combined_pre_prompt"] = (str) concatenated individual pre-prompts
33
+
34
+ Returns:
35
+ 3 processed data-filled JSON objects: farm_pretty_json, interactions_pretty_json, trial_pretty_json
36
  """
37
 
38
  input_text = input_data["input_text"]
 
48
  trial_prompt = input_data["input_context"] + trial_prompt
49
 
50
 
 
51
  try:
52
  #Call OpenAI API to generate structured output based on prompt
53
 
 
108
  return {"error": "Failed to generate valid JSON. " + str(e)}
109
 
110
  # This is for the step-wise JSON creation
111
+ def generate_json_pieces(input_data, parameters):
112
+ """
113
+ This is primarily for one of the flippers, which allows each individual JSON section to be created individually, then concatenates them all together.
114
+ It is proposed that perhaps the individual calls to the model will be more robust than giving the model all the data at once.
115
+
116
+ Args:
117
+ Args:
118
+ input_data: (dict) The input data, preprocessed, from the user. Aka what will fill in the JSON
119
+ input_data["input_text"] = (str) the preprocessed input text
120
+ input_data["input_context"] = (str) depending on levers, empty or what is put in front of the prompt
121
+ input_data["input_text_pieces"] = (dict) containing the individual split up prompt pieces: field_data_input, planting_data_input, logs_data_input, soil_data_input, yield_data_input
122
+
123
+ parameters: (dict) All of the individual parameters and "flippers"
124
+ parameters["model_version"] = (str) what model should be used
125
+ parameters["chaining"] = (bool) whether or not the preprocessed input context should be chained (given to multiple models)
126
+ parameters["context_pre_prompt"], parameters["summary_pre_prompt"], parameters["conversation_pre_prompt"], parameters["example_pre_prompt"] = (str) all of the pre-prompts, separated
127
+ parameters["combined_pre_prompt"] = (str) concatenated individual pre-prompts
128
+ parameters["additional_json_pieces_options"] = (str) "Explicit specific pieces" or "Parse from one big input text" to indicate whether it's many function calls on one input text or many function calls on smaller pieces of input texts
129
+
130
+ Returns:
131
+ (str - json) A final combined JSON containing the data filled schema for Farm Activites
132
+ """
133
+ specification = input_data["input_text"]
134
+ model_version = parameters["model_version"]
135
+ additional_json_creation_options = parameters["additional_json_pieces_options"]
136
+ field_data_input = input_data["input_text_pieces"]["field_data_input"]
137
+ planting_data_input = input_data["input_text_pieces"]["planting_data_input"]
138
+ logs_data_input = input_data["input_text_pieces"]["logs_data_input"]
139
+ soil_data_input = input_data["input_text_pieces"]["soil_data_input"]
140
+ yield_data_input = input_data["input_text_pieces"]["yield_data_input"]
141
 
142
  if additional_json_creation_options == "Explicit specific pieces":
143
  field_data_specification = field_data_input
 
231
  # return output1, output2, output3
232
 
233
  def pre_processing(input_data, parameters):
234
+ """
235
+ In the event there's a pre-prompt, process the pre-prompts and input text accordingly
236
+
237
+ Args:
238
+ input_data: (dict) The input data, preprocessed, from the user. Aka what will fill in the JSON
239
+ input_data["input_text"] = (str) the preprocessed input text
240
+ input_data["input_context"] = (str) depending on levers, empty or what is put in front of the prompt
241
+ input_data["input_text_pieces"] = (dict) containing the individual split up prompt pieces: field_data_input, planting_data_input, logs_data_input, soil_data_input, yield_data_input
242
+
243
+ parameters: (dict) All of the individual parameters and "flippers"
244
+ parameters["model_version"] = (str) what model should be used
245
+ parameters["chaining"] = (bool) whether or not the preprocessed input context should be chained (given to multiple models)
246
+ parameters["context_pre_prompt"], parameters["summary_pre_prompt"], parameters["conversation_pre_prompt"], parameters["example_pre_prompt"] = (str) all of the pre-prompts, separated
247
+ parameters["combined_pre_prompt"] = (str) concatenated individual pre-prompts
248
+ parameters["additional_json_pieces_options"] = (str) "Explicit specific pieces" or "Parse from one big input text" to indicate whether it's many function calls on one input text or many function calls on smaller pieces of input texts
249
+
250
+ Returns:
251
+ (dict) input_data
252
+ input_data["input_context"] = (bool) whether the input text should be used as context or not
253
+ input_data["input_text"] = (str) input text
254
+ """
255
 
256
  if parameters["chaining"]:
257
 
 
301
 
302
 
303
  def process_specifications(input_data, parameters):
304
+ """
305
+ Once the parameters and data are processed, do the pre-processing and then generate JSONs
306
+
307
+ Args:
308
+ input_data: (dict) The input data, preprocessed, from the user. Aka what will fill in the JSON
309
+ input_data["input_text"] = (str) the preprocessed input text
310
+ input_data["input_context"] = (str) depending on levers, empty or what is put in front of the prompt
311
+ input_data["input_text_pieces"] = (dict) containing the individual split up prompt pieces: field_data_input, planting_data_input, logs_data_input, soil_data_input, yield_data_input
312
+
313
+ parameters: (dict) All of the individual parameters and "flippers"
314
+ parameters["pre_prompt"] = (bool) whether or not there is a pre-prompt to process through pre_processing()
315
+ parameters["model_version"] = (str) what model should be used
316
+ parameters["chaining"] = (bool) whether or not the preprocessed input context should be chained (given to multiple models)
317
+ parameters["context_pre_prompt"], parameters["summary_pre_prompt"], parameters["conversation_pre_prompt"], parameters["example_pre_prompt"] = (str) all of the pre-prompts, separated
318
+ parameters["combined_pre_prompt"] = (str) concatenated individual pre-prompts
319
+ parameters["additional_json_pieces_options"] = (str) "Explicit specific pieces" or "Parse from one big input text" to indicate whether it's many function calls on one input text or many function calls on smaller pieces of input texts
320
+
321
+ Returns:
322
+ 3 processed data-filled JSON objects: farm_pretty_json, interactions_pretty_json, trial_pretty_json
323
+ """
324
  # here is where parsing and other things will happen before
325
  if parameters["pre_prompt"] == True:
326
  processed_input = pre_processing(input_data, parameters)
 
332
 
333
 
334
  def parse_survey_stack_parameters(data):
335
+ """
336
+ Parse the incoming parameters from the parameter survey
337
+
338
+ Args:
339
+ data: (json) JSON retrieved from surveystack API after retrieving survey info/details
340
+
341
+ Returns:
342
+ processed_data (dict)
343
+ processed_data["pre_prompt"] = (bool) whether or not there is a pre-prompt to process through pre_processing()
344
+ processed_data["model_version"] = (str) what model should be used
345
+ processed_data["chaining"] = (bool) whether or not the preprocessed input context should be chained (given to multiple models)
346
+ processed_data["context_pre_prompt"], parameters["summary_pre_prompt"], parameters["conversation_pre_prompt"], parameters["example_pre_prompt"] = (str) all of the pre-prompts, separated
347
+ processed_data["combined_pre_prompt"] = (str) concatenated individual pre-prompts
348
+ processed_data["additional_json_pieces_options"] = (str) "Explicit specific pieces" or "Parse from one big input text" to indicate whether it's many function calls on one input text or many function calls on smaller pieces of input texts
349
+ """
350
  processed_data = {}
351
 
352
  processed_data["model_version"] = data[0]['data']['modelversion']['value'][0]
 
381
  ])
382
  )
383
  processed_data["chaining"] = False
384
+ processed_data["combined_pre_prompt"] = combined_prompt
385
  else:
386
  # Set combined_pre_prompt to None if chaining is enabled
387
  processed_data["chaining"] = True
 
402
  return processed_data
403
 
404
  def parse_survey_stack_data(data):
405
+ """
406
+ Parse the incoming data from the survey stack survey
407
+
408
+ Args:
409
+ data: (json) JSON retrieved from surveystack API after retrieving survey info/details
410
+
411
+ Returns:
412
+ processed_data
413
+ processed_data["input_text"] = (str) the raw input text
414
+ """
415
  processed_data = {}
416
 
417
  processed_data["input_text"] = data[0]['data']['inputtext']['value']