rosemariafontana commited on
Commit
ca500b1
·
verified ·
1 Parent(s): 4ef58c6

Update process_data.py

Browse files
Files changed (1) hide show
  1. process_data.py +140 -18
process_data.py CHANGED
@@ -151,6 +151,12 @@ def generate_json_pieces(input_data, parameters):
151
  log_data_input = input_data["input_text_pieces"]["pre_processed_pieces"]["log_data_input"]
152
  soil_data_input = input_data["input_text_pieces"]["pre_processed_pieces"]["soil_data_input"]
153
  yield_data_input = input_data["input_text_pieces"]["pre_processed_pieces"]["yield_data_input"]
 
 
 
 
 
 
154
  else:
155
  print("Pre prompt is false")
156
  field_data_input = input_data["input_text_pieces"]["field_data_input"]
@@ -159,7 +165,14 @@ def generate_json_pieces(input_data, parameters):
159
  soil_data_input = input_data["input_text_pieces"]["soil_data_input"]
160
  yield_data_input = input_data["input_text_pieces"]["yield_data_input"]
161
 
 
 
 
 
 
 
162
 
 
163
  print("Setting prompts")
164
  field_prompt = "Extract the field information."
165
  plant_prompt = "Extract the planting information."
@@ -167,11 +180,13 @@ def generate_json_pieces(input_data, parameters):
167
  soil_prompt = "Extract the soil information."
168
  yield_prompt = "Extract the yield information."
169
 
170
- # fix this part
171
-
172
- # figure out what happens when there's
173
- # chaining, pre-prompts, context, etc ....
174
-
 
 
175
  if parameters["combined_pre_prompt"]:
176
  field_prompt = parameters["combined_pre_prompt"] + field_prompt
177
  plant_prompt = parameters["combined_pre_prompt"] + plant_prompt
@@ -179,6 +194,12 @@ def generate_json_pieces(input_data, parameters):
179
  soil_prompt = parameters["combined_pre_prompt"] + soil_prompt
180
  yield_prompt = parameters["combined_pre_prompt"] + yield_prompt
181
 
 
 
 
 
 
 
182
  try:
183
  # Call OpenAI API to generate structured output based on prompt
184
  print("Getting all responses in pieces, starting with field response")
@@ -292,9 +313,114 @@ def generate_json_pieces(input_data, parameters):
292
 
293
 
294
  # This is for the second schema now, interactions
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
295
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
296
 
297
- return final_pretty_farm_activity_json, final_pretty_json, final_pretty_json
298
  except Exception as e:
299
  return {"error": "Failed to generate valid JSON. " + str(e)}
300
 
@@ -551,16 +677,14 @@ def parse_survey_stack_data(data):
551
  print("NEXT SCHEMA INPUTS")
552
  interactions_inputs = data[0]['data']['group_5']
553
  print("INTERACTIONS INPUTS" + str(interactions_inputs))
554
- processed_data["input_text_pieces_second_schema"] = {}
555
- processed_data["input_text_pieces_second_schema"]["interaction_data_input"] = interactions_inputs.get('interaction_data_input', {}).get('value', None)
556
- processed_data["input_text_pieces_second_schema"]["person_data_input"] = interactions_inputs.get('person_data_input', {}).get('value', None)
557
 
558
  print("NEXT SCHEMA INPUTS 2")
559
  trials_inputs = data[0]['data']['group_6']
560
  print("TRIALS INPUTS" + str(trials_inputs))
561
- processed_data["input_text_pieces_third_schema"] = {}
562
- processed_data["input_text_pieces_third_schema"]["trial_data_input"] = trials_inputs.get('trial_data_input', {}).get('value', None)
563
- processed_data["input_text_pieces_third_schema"]["treatment_data_input"] = trials_inputs.get('treatment_data_input', {}).get('value', None)
564
 
565
 
566
  elif processed_data["stepwise_json_creation"][0] == "singlejsoncreation":
@@ -575,13 +699,11 @@ def parse_survey_stack_data(data):
575
  processed_data["input_text_pieces"]["soil_data_input"] = "EMPTY"
576
  processed_data["input_text_pieces"]["yield_data_input"] = "EMPTY"
577
 
578
- processed_data["input_text_pieces_second_schema"] = {}
579
- processed_data["input_text_pieces_second_schema"]["interaction_data_input"] = "EMPTY"
580
- processed_data["input_text_pieces_second_schema"]["person_data_input"] = "EMPTY"
581
 
582
- processed_data["input_text_pieces_third_schema"] = {}
583
- processed_data["input_text_pieces_third_schema"]["trial_data_input"] = "EMPTY"
584
- processed_data["input_text_pieces_third_schema"]["treatment_data_input"] = "EMPTY"
585
 
586
  print("RETURNING DATA")
587
  print(processed_data)
 
151
  log_data_input = input_data["input_text_pieces"]["pre_processed_pieces"]["log_data_input"]
152
  soil_data_input = input_data["input_text_pieces"]["pre_processed_pieces"]["soil_data_input"]
153
  yield_data_input = input_data["input_text_pieces"]["pre_processed_pieces"]["yield_data_input"]
154
+
155
+ interaction_data_input = input_data["input_text_pieces"]["pre_processed_pieces"]["interaction_data_input"]
156
+ person_data_input = input_data["input_text_pieces"]["pre_processed_pieces"]["person_data_input"]
157
+
158
+ trial_data_input = input_data["input_text_pieces"]["pre_processed_pieces"]["trial_data_input"]
159
+ treatment_data_input = input_data["input_text_pieces"]["pre_processed_pieces"]["treatment_data_input"]
160
  else:
161
  print("Pre prompt is false")
162
  field_data_input = input_data["input_text_pieces"]["field_data_input"]
 
165
  soil_data_input = input_data["input_text_pieces"]["soil_data_input"]
166
  yield_data_input = input_data["input_text_pieces"]["yield_data_input"]
167
 
168
+ interaction_data_input = input_data["input_text_pieces"]["interaction_data_input"]
169
+ person_data_input = input_data["input_text_pieces"]["person_data_input"]
170
+
171
+ trial_data_input = input_data["input_text_pieces"]["trial_data_input"]
172
+ treatment_data_input = input_data["input_text_pieces"]["treatment_data_input"]
173
+
174
 
175
+ # Fix these prompts for all
176
  print("Setting prompts")
177
  field_prompt = "Extract the field information."
178
  plant_prompt = "Extract the planting information."
 
180
  soil_prompt = "Extract the soil information."
181
  yield_prompt = "Extract the yield information."
182
 
183
+ interaction_prompt = "Extract the interaction information"
184
+ person_prompt = "Please provide a list of people involved in this interaction, with each person's name, role, and any other relevant details."
185
+
186
+ trial_prompt = "Extract the trial information"
187
+ treatment_prompt = "Please provide a list of different treatments (strips or blocks with the same conditions applied) performed by the partner."
188
+
189
+
190
  if parameters["combined_pre_prompt"]:
191
  field_prompt = parameters["combined_pre_prompt"] + field_prompt
192
  plant_prompt = parameters["combined_pre_prompt"] + plant_prompt
 
194
  soil_prompt = parameters["combined_pre_prompt"] + soil_prompt
195
  yield_prompt = parameters["combined_pre_prompt"] + yield_prompt
196
 
197
+ interaction_prompt = parameters["combined_pre_prompt"] + interaction_prompt
198
+ person_prompt = parameters["combined_pre_prompt"] + person_prompt
199
+
200
+ trial_prompt = parameters["combined_pre_prompt"] + trial_prompt
201
+ treatment_prompt = parameters["combined_pre_prompt"] + treatment_prompt
202
+
203
  try:
204
  # Call OpenAI API to generate structured output based on prompt
205
  print("Getting all responses in pieces, starting with field response")
 
313
 
314
 
315
  # This is for the second schema now, interactions
316
+ print("Interaction prompt")
317
+ print(interaction_prompt)
318
+
319
+ print("Interaction data input")
320
+ print(interaction_data_input)
321
+
322
+ interaction_response = client.beta.chat.completions.parse(
323
+ model=model_version, # Use GPT model that supports structured output
324
+ messages=[
325
+ {"role": "system", "content": interaction_prompt},
326
+ {"role": "user", "content": interaction_data_input}
327
+ ],
328
+ response_format=InteractionsLite,
329
+ )
330
+
331
+ interaction_generated_json = interaction_response.choices[0].message.parsed
332
+
333
+ print("INTERACTION JSON: ")
334
+ interaction_pretty_json = interaction_generated_json.dict()
335
+ print(interaction_pretty_json) # debugging
336
+
337
+ print("Person prompt")
338
+ print(person_prompt)
339
+
340
+ print("Person data input")
341
+ print(person_data_input)
342
+
343
+ interaction_response = client.beta.chat.completions.parse(
344
+ model=model_version, # Use GPT model that supports structured output
345
+ messages=[
346
+ {"role": "system", "content": person_prompt},
347
+ {"role": "user", "content": person_data_input}
348
+ ],
349
+ response_format=Person,
350
+ )
351
+
352
+ person_generated_json = person_response.choices[0].message.parsed
353
 
354
+ print("PERSON JSON: ")
355
+ person_pretty_json = person_generated_json.dict()
356
+ print(person_pretty_json) # debugging
357
+
358
+ interactions = {
359
+ **interaction_pretty_json,
360
+ "people": person_generated_json
361
+ }
362
+
363
+ print("ADDED DICTS 2")
364
+ print(interactions)
365
+ print("FINAL JSON: ")
366
+ final_pretty_interactions_json = json.dumps(interactions, indent=4)
367
+ print(final_pretty_interactions_json)
368
+
369
+ # This is for the third schema now, trials
370
+ print("Trial prompt")
371
+ print(trial_prompt)
372
+
373
+ print("Trial data input")
374
+ print(trial_data_input)
375
+
376
+ trial_response = client.beta.chat.completions.parse(
377
+ model=model_version, # Use GPT model that supports structured output
378
+ messages=[
379
+ {"role": "system", "content": trial_prompt},
380
+ {"role": "user", "content": trial_data_input}
381
+ ],
382
+ response_format=TrialLite,
383
+ )
384
+
385
+ trial_generated_json = trial_response.choices[0].message.parsed
386
+
387
+ print("TRIAL JSON: ")
388
+ trial_pretty_json = trial_generated_json.dict()
389
+ print(trial_pretty_json) # debugging
390
+
391
+ print("Treatment prompt")
392
+ print(treatment_prompt)
393
+
394
+ print("Treatment data input")
395
+ print(treatment_data_input)
396
+
397
+ treatment_response = client.beta.chat.completions.parse(
398
+ model=model_version, # Use GPT model that supports structured output
399
+ messages=[
400
+ {"role": "system", "content": treatment_prompt},
401
+ {"role": "user", "content": treatment_data_input}
402
+ ],
403
+ response_format=Treatment,
404
+ )
405
+
406
+ treatment_generated_json = treatment_response.choices[0].message.parsed
407
+
408
+ print("TREATMENT JSON: ")
409
+ treatment_pretty_json = treatment_generated_json.dict()
410
+ print(treatment_pretty_json) # debugging
411
+
412
+ trials = {
413
+ **trial_pretty_json,
414
+ "treatments": treatment_generated_json
415
+ }
416
+
417
+ print("ADDED DICTS 3")
418
+ print(trials)
419
+ print("TREATMENT JSON: ")
420
+ final_pretty_trials_json = json.dumps(trials, indent=4)
421
+ print(final_pretty_trials_json)
422
 
423
+ return final_pretty_farm_activity_json, final_pretty_interactions_json, final_pretty_trials_json
424
  except Exception as e:
425
  return {"error": "Failed to generate valid JSON. " + str(e)}
426
 
 
677
  print("NEXT SCHEMA INPUTS")
678
  interactions_inputs = data[0]['data']['group_5']
679
  print("INTERACTIONS INPUTS" + str(interactions_inputs))
680
+ processed_data["input_text_pieces"]["interaction_data_input"] = interactions_inputs.get('interaction_data_input', {}).get('value', None)
681
+ processed_data["input_text_pieces"]["person_data_input"] = interactions_inputs.get('person_data_input', {}).get('value', None)
 
682
 
683
  print("NEXT SCHEMA INPUTS 2")
684
  trials_inputs = data[0]['data']['group_6']
685
  print("TRIALS INPUTS" + str(trials_inputs))
686
+ processed_data["input_text_pieces"]["trial_data_input"] = trials_inputs.get('trial_data_input', {}).get('value', None)
687
+ processed_data["input_text_pieces"]["treatment_data_input"] = trials_inputs.get('treatment_data_input', {}).get('value', None)
 
688
 
689
 
690
  elif processed_data["stepwise_json_creation"][0] == "singlejsoncreation":
 
699
  processed_data["input_text_pieces"]["soil_data_input"] = "EMPTY"
700
  processed_data["input_text_pieces"]["yield_data_input"] = "EMPTY"
701
 
702
+ processed_data["input_text_pieces"]["interaction_data_input"] = "EMPTY"
703
+ processed_data["input_text_pieces"]["person_data_input"] = "EMPTY"
 
704
 
705
+ processed_data["input_text_pieces"]["trial_data_input"] = "EMPTY"
706
+ processed_data["input_text_pieces"]["treatment_data_input"] = "EMPTY"
 
707
 
708
  print("RETURNING DATA")
709
  print(processed_data)