rosemariafontana commited on
Commit
b3c2c53
·
verified ·
1 Parent(s): 2a00c62

Update process_data.py

Browse files
Files changed (1) hide show
  1. process_data.py +90 -12
process_data.py CHANGED
@@ -20,13 +20,25 @@ def generate_json(input_data, model_version):
20
  Function to prompt OpenAI API to generate structured JSON output.
21
  """
22
 
 
 
 
 
 
 
 
 
 
 
 
23
  try:
24
  #Call OpenAI API to generate structured output based on prompt
 
25
  farm_info_response = client.beta.chat.completions.parse(
26
  model=model_version, # Use GPT model that supports structured output
27
  messages=[
28
- {"role": "system", "content": "Extract the farm information."},
29
- {"role": "user", "content": input_data}
30
  ],
31
  response_format=FarmActivities,
32
  )
@@ -35,16 +47,18 @@ def generate_json(input_data, model_version):
35
  raise ValueError(f"API error: {interactions_response['error']['message']}")
36
 
37
  farm_generated_json = farm_info_response.choices[0].message.parsed
 
 
38
  print("FARM JSON: ")
39
  print(farm_generated_json) # debugging
40
-
41
  farm_pretty_json = farm_generated_json.json()
42
 
 
43
  interactions_response = client.beta.chat.completions.parse(
44
  model=model_version, # Use GPT model that supports structured output
45
  messages=[
46
- {"role": "system", "content": "Extract the interactions information."},
47
- {"role": "user", "content": specification}
48
  ],
49
  response_format=Interactions,
50
  )
@@ -56,15 +70,14 @@ def generate_json(input_data, model_version):
56
 
57
  print("INTERACTIONS JSON: ")
58
  print(interactions_generated_json) # debugging 2
59
-
60
  interactions_pretty_json = interactions_generated_json.json()
61
 
62
 
63
  trial_response = client.beta.chat.completions.parse(
64
  model=model_version, # Use GPT model that supports structured output
65
  messages=[
66
- {"role": "system", "content": "Extract the trial information."},
67
- {"role": "user", "content": specification}
68
  ],
69
  response_format=Trial,
70
  )
@@ -126,7 +139,7 @@ def generate_json_pieces(specification, model_version, additional_json_creation_
126
  log_response = client.beta.chat.completions.parse(
127
  model=model_version, # Use GPT model that supports structured output
128
  messages=[
129
- {"role": "system", "content": "Extract the planting information."},
130
  {"role": "user", "content": logs_data_specification}
131
  ],
132
  response_format=Log,
@@ -135,7 +148,7 @@ def generate_json_pieces(specification, model_version, additional_json_creation_
135
  soil_response = client.beta.chat.completions.parse(
136
  model=model_version, # Use GPT model that supports structured output
137
  messages=[
138
- {"role": "system", "content": "Extract the planting information."},
139
  {"role": "user", "content": soil_data_specification}
140
  ],
141
  response_format=Soil,
@@ -144,7 +157,7 @@ def generate_json_pieces(specification, model_version, additional_json_creation_
144
  yield_response = client.beta.chat.completions.parse(
145
  model=model_version, # Use GPT model that supports structured output
146
  messages=[
147
- {"role": "system", "content": "Extract the planting information."},
148
  {"role": "user", "content": yield_data_specification}
149
  ],
150
  response_format=Yield,
@@ -186,15 +199,80 @@ def generate_json_pieces(specification, model_version, additional_json_creation_
186
 
187
  # return output1, output2, output3
188
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
189
  def process_specifications(input_data, parameters):
190
  # here is where parsing and other things will happen before
191
- return generate_json(input_data["input_text"], parameters["model_version"])
 
 
 
 
192
 
193
 
194
  def parse_survey_stack_parameters(data):
195
  processed_data = {}
196
 
197
  processed_data["model_version"] = data[0]['data']['modelversion']['value'][0]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
198
 
199
  return processed_data
200
 
 
20
  Function to prompt OpenAI API to generate structured JSON output.
21
  """
22
 
23
+ input_text = input_data["input_text"]
24
+
25
+ farm_prompt = "Extract the farm information."
26
+ interactions_prompt = "Extract the interactions information."
27
+ trial_prompt = "Extract the trial information."
28
+
29
+ if input_data["input_context"]:
30
+ farm_prompt = input_data["input_context"] + farm_prompt
31
+ interactions_prompt = input_data["input_context"] + interactions_prompt
32
+ trial_prompt = input_data["input_context"] + trial_prompt
33
+
34
  try:
35
  #Call OpenAI API to generate structured output based on prompt
36
+
37
  farm_info_response = client.beta.chat.completions.parse(
38
  model=model_version, # Use GPT model that supports structured output
39
  messages=[
40
+ {"role": "system", "content": farm_prompt},
41
+ {"role": "user", "content": input_text}
42
  ],
43
  response_format=FarmActivities,
44
  )
 
47
  raise ValueError(f"API error: {interactions_response['error']['message']}")
48
 
49
  farm_generated_json = farm_info_response.choices[0].message.parsed
50
+
51
+
52
  print("FARM JSON: ")
53
  print(farm_generated_json) # debugging
 
54
  farm_pretty_json = farm_generated_json.json()
55
 
56
+
57
  interactions_response = client.beta.chat.completions.parse(
58
  model=model_version, # Use GPT model that supports structured output
59
  messages=[
60
+ {"role": "system", "content": interactions_prompt},
61
+ {"role": "user", "content": input_text}
62
  ],
63
  response_format=Interactions,
64
  )
 
70
 
71
  print("INTERACTIONS JSON: ")
72
  print(interactions_generated_json) # debugging 2
 
73
  interactions_pretty_json = interactions_generated_json.json()
74
 
75
 
76
  trial_response = client.beta.chat.completions.parse(
77
  model=model_version, # Use GPT model that supports structured output
78
  messages=[
79
+ {"role": "system", "content": trial_prompt},
80
+ {"role": "user", "content": input_text}
81
  ],
82
  response_format=Trial,
83
  )
 
139
  log_response = client.beta.chat.completions.parse(
140
  model=model_version, # Use GPT model that supports structured output
141
  messages=[
142
+ {"role": "system", "content": "Extract the log information."},
143
  {"role": "user", "content": logs_data_specification}
144
  ],
145
  response_format=Log,
 
148
  soil_response = client.beta.chat.completions.parse(
149
  model=model_version, # Use GPT model that supports structured output
150
  messages=[
151
+ {"role": "system", "content": "Extract the soil information."},
152
  {"role": "user", "content": soil_data_specification}
153
  ],
154
  response_format=Soil,
 
157
  yield_response = client.beta.chat.completions.parse(
158
  model=model_version, # Use GPT model that supports structured output
159
  messages=[
160
+ {"role": "system", "content": "Extract the yield information."},
161
  {"role": "user", "content": yield_data_specification}
162
  ],
163
  response_format=Yield,
 
199
 
200
  # return output1, output2, output3
201
 
202
+ def pre_processing(input_data, parameters):
203
+ # in the event there's a pre-prompt, process
204
+
205
+ if processed_data["chaining"]:
206
+
207
+ input_text = input_data["input_text"]
208
+ pre_processing_list = [processed_data["context_pre_prompt"], processed_data["summary_pre_prompt"], processed_data["conversation_pre_prompt"], processed_data["example_pre_prompt"]]
209
+
210
+ for pre_prompt in pre_processing_list:
211
+
212
+ response = client.chat.completions.create(
213
+ model=parameters["model_version"],
214
+ messages=[
215
+ {"role": "system", "content": pre_prompt},
216
+ {"role": "user", "content": input_text}
217
+ ]
218
+ )
219
+
220
+ response_text = response['choices'][0]['message']['content']
221
+
222
+ try:
223
+ partially_processed_input = json.loads(response_text)
224
+ input_text = partially_processed_input
225
+
226
+ except Exception as e:
227
+ print("Failed to parse response as JSON. Error was:")
228
+ print(e)
229
+
230
+ input_data["input_context"] = False
231
+ input_data["input_text"] = input_text
232
+ return input_data
233
+ else:
234
+ input_context = f"You are processing farm activity, interactions, and trial data. Here's important context of the data {processed_data["combined_prompt"]}. With this context in mind, "
235
+ input_data["input_context"] = input_context
236
+ return input_data
237
+
238
+
239
+
240
  def process_specifications(input_data, parameters):
241
  # here is where parsing and other things will happen before
242
+ if parameters["pre_prompt"] == True:
243
+ input_data = pre_processing(input_data, parameters)
244
+ return generate_json(input_data, parameters["model_version"])
245
+ else:
246
+ return generate_json(input_data["input_text"], parameters["model_version"])
247
 
248
 
249
  def parse_survey_stack_parameters(data):
250
  processed_data = {}
251
 
252
  processed_data["model_version"] = data[0]['data']['modelversion']['value'][0]
253
+
254
+ if data[0]['data']['preprompt']['value'][0] == 'continue_preprompts':
255
+ processed_data["pre_prompt"] = True
256
+ processed_data["context_pre_prompt"] = data[0]['data']['contextpreprompt']['value']
257
+ processed_data["summary_pre_prompt"] = data[0]['data']['summarypreprompt']['value']
258
+ processed_data["conversation_pre_prompt"] = data[0]['data']['conversationpreprompt']['value']
259
+ processed_data["example_pre_prompt"] = data[0]['data']['examplepreprompt']['value']
260
+ processed_data["chaining"] = data[0]['data']['prepromptchaining']['value'][0]
261
+ if processed_data["chaining"] == "no":
262
+ combined_prompt = " ".join(filter(None, [processed_data["context_pre_prompt"], processed_data["summary_pre_prompt"], processed_data["conversation_pre_prompt"], processed_data["example_pre_prompt"]]))
263
+ processed_data["combined_prompt"] = combined_prompt
264
+ processed_data["chaining"] = False
265
+ else:
266
+ processed_data["chaining"] = True
267
+ processed_data["combined_pre_prompt"] = None
268
+ else:
269
+ processed_data["pre_prompt"] = False
270
+ processed_data["context_pre_prompt"] = None
271
+ processed_data["summary_pre_prompt"] = None
272
+ processed_data["conversation_pre_prompt"] = None
273
+ processed_data["example_pre_prompt"] = None
274
+ processed_data["chaining"] = None
275
+ processed_data["combined_pre_prompt"] = None
276
 
277
  return processed_data
278