Spaces:

our-sci
/

data-translation-experiments

Sleeping

App Files Files Community

rosemariafontana commited on Nov 15, 2024

Commit

ca500b1

verified ·

1 Parent(s): 4ef58c6

Update process_data.py

Browse files

Files changed (1) hide show

process_data.py +140 -18

process_data.py CHANGED Viewed

@@ -151,6 +151,12 @@ def generate_json_pieces(input_data, parameters):
         log_data_input = input_data["input_text_pieces"]["pre_processed_pieces"]["log_data_input"]
         soil_data_input = input_data["input_text_pieces"]["pre_processed_pieces"]["soil_data_input"]
         yield_data_input = input_data["input_text_pieces"]["pre_processed_pieces"]["yield_data_input"]
     else:
         print("Pre prompt is false")
         field_data_input = input_data["input_text_pieces"]["field_data_input"]
@@ -159,7 +165,14 @@ def generate_json_pieces(input_data, parameters):
         soil_data_input = input_data["input_text_pieces"]["soil_data_input"]
         yield_data_input = input_data["input_text_pieces"]["yield_data_input"]
     print("Setting prompts")
     field_prompt = "Extract the field information."
     plant_prompt = "Extract the planting information."
@@ -167,11 +180,13 @@ def generate_json_pieces(input_data, parameters):
     soil_prompt = "Extract the soil information."
     yield_prompt = "Extract the yield information."
-    # fix this part
-    # figure out what happens when there's
-    # chaining, pre-prompts, context, etc ....
     if parameters["combined_pre_prompt"]:
         field_prompt = parameters["combined_pre_prompt"] + field_prompt
         plant_prompt = parameters["combined_pre_prompt"] + plant_prompt
@@ -179,6 +194,12 @@ def generate_json_pieces(input_data, parameters):
         soil_prompt = parameters["combined_pre_prompt"] + soil_prompt
         yield_prompt = parameters["combined_pre_prompt"] + yield_prompt
     try:
         # Call OpenAI API to generate structured output based on prompt
         print("Getting all responses in pieces, starting with field response")
@@ -292,9 +313,114 @@ def generate_json_pieces(input_data, parameters):
         # This is for the second schema now, interactions
-        return final_pretty_farm_activity_json, final_pretty_json, final_pretty_json
     except Exception as e:
         return {"error": "Failed to generate valid JSON. " + str(e)}
@@ -551,16 +677,14 @@ def parse_survey_stack_data(data):
         print("NEXT SCHEMA INPUTS")
         interactions_inputs = data[0]['data']['group_5']
         print("INTERACTIONS INPUTS" + str(interactions_inputs))
-        processed_data["input_text_pieces_second_schema"] = {}
-        processed_data["input_text_pieces_second_schema"]["interaction_data_input"] = interactions_inputs.get('interaction_data_input', {}).get('value', None)
-        processed_data["input_text_pieces_second_schema"]["person_data_input"] = interactions_inputs.get('person_data_input', {}).get('value', None)
         print("NEXT SCHEMA INPUTS 2")
         trials_inputs = data[0]['data']['group_6']
         print("TRIALS INPUTS" + str(trials_inputs))
-        processed_data["input_text_pieces_third_schema"] = {}
-        processed_data["input_text_pieces_third_schema"]["trial_data_input"] = trials_inputs.get('trial_data_input', {}).get('value', None)
-        processed_data["input_text_pieces_third_schema"]["treatment_data_input"] = trials_inputs.get('treatment_data_input', {}).get('value', None)
     elif processed_data["stepwise_json_creation"][0] == "singlejsoncreation":
@@ -575,13 +699,11 @@ def parse_survey_stack_data(data):
         processed_data["input_text_pieces"]["soil_data_input"] = "EMPTY"
         processed_data["input_text_pieces"]["yield_data_input"] = "EMPTY"
-        processed_data["input_text_pieces_second_schema"] = {}
-        processed_data["input_text_pieces_second_schema"]["interaction_data_input"] = "EMPTY"
-        processed_data["input_text_pieces_second_schema"]["person_data_input"] = "EMPTY"
-        processed_data["input_text_pieces_third_schema"] = {}
-        processed_data["input_text_pieces_third_schema"]["trial_data_input"] = "EMPTY"
-        processed_data["input_text_pieces_third_schema"]["treatment_data_input"] = "EMPTY"
     print("RETURNING DATA")
     print(processed_data)

         log_data_input = input_data["input_text_pieces"]["pre_processed_pieces"]["log_data_input"]
         soil_data_input = input_data["input_text_pieces"]["pre_processed_pieces"]["soil_data_input"]
         yield_data_input = input_data["input_text_pieces"]["pre_processed_pieces"]["yield_data_input"]
+        interaction_data_input = input_data["input_text_pieces"]["pre_processed_pieces"]["interaction_data_input"]
+        person_data_input = input_data["input_text_pieces"]["pre_processed_pieces"]["person_data_input"]
+        trial_data_input = input_data["input_text_pieces"]["pre_processed_pieces"]["trial_data_input"]
+        treatment_data_input = input_data["input_text_pieces"]["pre_processed_pieces"]["treatment_data_input"]
     else:
         print("Pre prompt is false")
         field_data_input = input_data["input_text_pieces"]["field_data_input"]
         soil_data_input = input_data["input_text_pieces"]["soil_data_input"]
         yield_data_input = input_data["input_text_pieces"]["yield_data_input"]
+        interaction_data_input = input_data["input_text_pieces"]["interaction_data_input"]
+        person_data_input = input_data["input_text_pieces"]["person_data_input"]
+        trial_data_input = input_data["input_text_pieces"]["trial_data_input"]
+        treatment_data_input = input_data["input_text_pieces"]["treatment_data_input"]
+    # Fix these prompts for all
     print("Setting prompts")
     field_prompt = "Extract the field information."
     plant_prompt = "Extract the planting information."
     soil_prompt = "Extract the soil information."
     yield_prompt = "Extract the yield information."
+    interaction_prompt = "Extract the interaction information"
+    person_prompt = "Please provide a list of people involved in this interaction, with each person's name, role, and any other relevant details."
+    trial_prompt = "Extract the trial information"
+    treatment_prompt = "Please provide a list of different treatments (strips or blocks with the same conditions applied) performed by the partner."
     if parameters["combined_pre_prompt"]:
         field_prompt = parameters["combined_pre_prompt"] + field_prompt
         plant_prompt = parameters["combined_pre_prompt"] + plant_prompt
         soil_prompt = parameters["combined_pre_prompt"] + soil_prompt
         yield_prompt = parameters["combined_pre_prompt"] + yield_prompt
+        interaction_prompt = parameters["combined_pre_prompt"] + interaction_prompt
+        person_prompt = parameters["combined_pre_prompt"] + person_prompt
+        trial_prompt = parameters["combined_pre_prompt"] + trial_prompt
+        treatment_prompt = parameters["combined_pre_prompt"] + treatment_prompt
     try:
         # Call OpenAI API to generate structured output based on prompt
         print("Getting all responses in pieces, starting with field response")
         # This is for the second schema now, interactions
+        print("Interaction prompt")
+        print(interaction_prompt)
+        print("Interaction data input")
+        print(interaction_data_input)
+        interaction_response = client.beta.chat.completions.parse(
+            model=model_version,  # Use GPT model that supports structured output
+            messages=[
+                {"role": "system", "content": interaction_prompt},
+                {"role": "user", "content": interaction_data_input}
+            ],
+            response_format=InteractionsLite,
+        )
+        interaction_generated_json = interaction_response.choices[0].message.parsed
+        print("INTERACTION JSON: ")
+        interaction_pretty_json = interaction_generated_json.dict()
+        print(interaction_pretty_json) # debugging
+        print("Person prompt")
+        print(person_prompt)
+        print("Person data input")
+        print(person_data_input)
+        interaction_response = client.beta.chat.completions.parse(
+            model=model_version,  # Use GPT model that supports structured output
+            messages=[
+                {"role": "system", "content": person_prompt},
+                {"role": "user", "content": person_data_input}
+            ],
+            response_format=Person,
+        )
+        person_generated_json = person_response.choices[0].message.parsed
+        print("PERSON JSON: ")
+        person_pretty_json = person_generated_json.dict()
+        print(person_pretty_json) # debugging
+        interactions = {
+            **interaction_pretty_json,
+            "people": person_generated_json
+        }
+        print("ADDED DICTS 2")
+        print(interactions)
+        print("FINAL JSON: ")
+        final_pretty_interactions_json = json.dumps(interactions, indent=4)
+        print(final_pretty_interactions_json)
+        # This is for the third schema now, trials
+        print("Trial prompt")
+        print(trial_prompt)
+        print("Trial data input")
+        print(trial_data_input)
+        trial_response = client.beta.chat.completions.parse(
+            model=model_version,  # Use GPT model that supports structured output
+            messages=[
+                {"role": "system", "content": trial_prompt},
+                {"role": "user", "content": trial_data_input}
+            ],
+            response_format=TrialLite,
+        )
+        trial_generated_json = trial_response.choices[0].message.parsed
+        print("TRIAL JSON: ")
+        trial_pretty_json = trial_generated_json.dict()
+        print(trial_pretty_json) # debugging
+        print("Treatment prompt")
+        print(treatment_prompt)
+        print("Treatment data input")
+        print(treatment_data_input)
+        treatment_response = client.beta.chat.completions.parse(
+            model=model_version,  # Use GPT model that supports structured output
+            messages=[
+                {"role": "system", "content": treatment_prompt},
+                {"role": "user", "content": treatment_data_input}
+            ],
+            response_format=Treatment,
+        )
+        treatment_generated_json = treatment_response.choices[0].message.parsed
+        print("TREATMENT JSON: ")
+        treatment_pretty_json = treatment_generated_json.dict()
+        print(treatment_pretty_json) # debugging
+        trials = {
+            **trial_pretty_json,
+            "treatments": treatment_generated_json
+        }
+        print("ADDED DICTS 3")
+        print(trials)
+        print("TREATMENT JSON: ")
+        final_pretty_trials_json = json.dumps(trials, indent=4)
+        print(final_pretty_trials_json)
+        return final_pretty_farm_activity_json, final_pretty_interactions_json, final_pretty_trials_json
     except Exception as e:
         return {"error": "Failed to generate valid JSON. " + str(e)}
         print("NEXT SCHEMA INPUTS")
         interactions_inputs = data[0]['data']['group_5']
         print("INTERACTIONS INPUTS" + str(interactions_inputs))
+        processed_data["input_text_pieces"]["interaction_data_input"] = interactions_inputs.get('interaction_data_input', {}).get('value', None)
+        processed_data["input_text_pieces"]["person_data_input"] = interactions_inputs.get('person_data_input', {}).get('value', None)
         print("NEXT SCHEMA INPUTS 2")
         trials_inputs = data[0]['data']['group_6']
         print("TRIALS INPUTS" + str(trials_inputs))
+        processed_data["input_text_pieces"]["trial_data_input"] = trials_inputs.get('trial_data_input', {}).get('value', None)
+        processed_data["input_text_pieces"]["treatment_data_input"] = trials_inputs.get('treatment_data_input', {}).get('value', None)
     elif processed_data["stepwise_json_creation"][0] == "singlejsoncreation":
         processed_data["input_text_pieces"]["soil_data_input"] = "EMPTY"
         processed_data["input_text_pieces"]["yield_data_input"] = "EMPTY"
+        processed_data["input_text_pieces"]["interaction_data_input"] = "EMPTY"
+        processed_data["input_text_pieces"]["person_data_input"] = "EMPTY"
+        processed_data["input_text_pieces"]["trial_data_input"] = "EMPTY"
+        processed_data["input_text_pieces"]["treatment_data_input"] = "EMPTY"
     print("RETURNING DATA")
     print(processed_data)