Spaces:

our-sci
/

data-translation-experiments

Sleeping

App Files Files Community

rosemariafontana commited on Nov 13, 2024

Commit

d1c6aca

verified ·

1 Parent(s): 9fe6dfb

Update process_data.py

Browse files

Files changed (1) hide show

process_data.py +52 -23

process_data.py CHANGED Viewed

@@ -145,13 +145,30 @@ def generate_json_pieces(input_data, parameters):
         logs_data_input = input_data["input_text_pieces"]["pre_processed_pieces"]["logs_data_input"]
         soil_data_input = input_data["input_text_pieces"]["pre_processed_pieces"]["soil_data_input"]
         yield_data_input = input_data["input_text_pieces"]["pre_processed_pieces"]["yield_data_input"]
     try:
         # Call OpenAI API to generate structured output based on prompt
         field_response = client.beta.chat.completions.parse(
             model=model_version,  # Use GPT model that supports structured output
             messages=[
-                {"role": "system", "content": "Extract the field information."},
                 {"role": "user", "content": field_data_specification}
             ],
             response_format=FarmActivitiesLite,
@@ -160,7 +177,7 @@ def generate_json_pieces(input_data, parameters):
         plant_response = client.beta.chat.completions.parse(
             model=model_version,  # Use GPT model that supports structured output
             messages=[
-                {"role": "system", "content": "Extract the planting information."},
                 {"role": "user", "content": planting_data_specification}
             ],
             response_format=PlantingLite,
@@ -169,7 +186,7 @@ def generate_json_pieces(input_data, parameters):
         log_response = client.beta.chat.completions.parse(
             model=model_version,  # Use GPT model that supports structured output
             messages=[
-                {"role": "system", "content": "Extract the log information."},
                 {"role": "user", "content": logs_data_specification}
             ],
             response_format=Log,
@@ -178,7 +195,7 @@ def generate_json_pieces(input_data, parameters):
         soil_response = client.beta.chat.completions.parse(
             model=model_version,  # Use GPT model that supports structured output
             messages=[
-                {"role": "system", "content": "Extract the soil information."},
                 {"role": "user", "content": soil_data_specification}
             ],
             response_format=Soil,
@@ -187,7 +204,7 @@ def generate_json_pieces(input_data, parameters):
         yield_response = client.beta.chat.completions.parse(
             model=model_version,  # Use GPT model that supports structured output
             messages=[
-                {"role": "system", "content": "Extract the yield information."},
                 {"role": "user", "content": yield_data_specification}
             ],
             response_format=Yield,
@@ -231,26 +248,32 @@ def pre_processing(input_data, parameters):
         input_data["input_text"] = (str) input text
     """
     if parameters["stepwise_json_creation"] == "stepwisejsoncreation":
-        pre_processing_prompts = [parameters["context_pre_prompt"], parameters["summary_pre_prompt"], parameters["conversation_pre_prompt"], parameters["example_pre_prompt"]]
-        one_giant_preprompt = ""
-        input_data["input_text_pieces"]["pre_processed_pieces"] = {}
-        for pre_prompt in pre_processing_prompts:
-            if pre_prompt:
-                one_giant_preprompt += pre_prompt
-        for text_label, text_body in input_data["input_text_pieces"].items():
-            response = client.chat.completions.create(
-                            model=parameters["model_version"],
-                            messages=[
-                                {"role": "system", "content": one_giant_preprompt},
-                                {"role": "user", "content": text_body}
-                            ]
-                        )
-            response_text = response.choices[0].message.content
-            input_data["input_text_pieces"]["pre_processed_pieces"][text_label] = response_text
     if parameters["stepwise_json_creation"] == "singlejsoncreation":
@@ -418,13 +441,16 @@ def parse_survey_stack_data(data):
         processed_data
             processed_data["input_text"] = (str) the raw input text
     """
     processed_data = {}
     farm_management_inputs = data[0]['data']['group_4']
     processed_data["stepwise_json_creation"] = data[0]['data']['stepwisejsoncreation']['value']
     if processed_data["stepwise_json_creation"] == "stepwisejsoncreation":
         processed_data["input_text_pieces"] = {}
         processed_data["input_text_pieces"]["field_data_input"] = farm_management_inputs.get('field_data_input', {}).get('value', None)
         processed_data["input_text_pieces"]["planting_data_input"] = farm_management_inputs.get('planting_data_input', {}).get('value', None)
@@ -433,6 +459,7 @@ def parse_survey_stack_data(data):
         processed_data["input_text_pieces"]["yield_data_input"] = farm_management_inputs.get('yield_data_input', {}).get('value', None)
         processed_data["input_text"] = "EMPTY"
     elif processed_data["stepwise_json_creation"] == "singlejsoncreation":
         processed_data["input_text"] = data[0]['data']['onelonginputtext']['value']
         processed_data["input_text_pieces"] = {}
         processed_data["input_text_pieces"]["field_data_input"] = "EMPTY"
@@ -441,5 +468,7 @@ def parse_survey_stack_data(data):
         processed_data["input_text_pieces"]["soil_data_input"] = "EMPTY"
         processed_data["input_text_pieces"]["yield_data_input"] = "EMPTY"
     return processed_data

         logs_data_input = input_data["input_text_pieces"]["pre_processed_pieces"]["logs_data_input"]
         soil_data_input = input_data["input_text_pieces"]["pre_processed_pieces"]["soil_data_input"]
         yield_data_input = input_data["input_text_pieces"]["pre_processed_pieces"]["yield_data_input"]
+    field_prompt = "Extract the field information."
+    plant_prompt = "Extract the planting information."
+    log_prompt = "Extract the log information."
+    soil_prompt = "Extract the soil information."
+    yield_prompt = "Extract the yield information."
+    # fix this part
+    # figure out what happens when there's
+    # chaining, pre-prompts, context, etc ....
+    #if not parameters["chaining"] and input_data["input_context"]:
+    #    farm_prompt = input_data["input_text_pieces"]["field_data_input"] + farm_prompt
+    #    farm_prompt = input_data["input_text_pieces"]["field_data_input"] + farm_prompt
+    #    farm_prompt = input_data["input_text_pieces"]["field_data_input"] + farm_prompt
+    #    farm_prompt = input_data["input_text_pieces"]["field_data_input"] + farm_prompt
     try:
         # Call OpenAI API to generate structured output based on prompt
         field_response = client.beta.chat.completions.parse(
             model=model_version,  # Use GPT model that supports structured output
             messages=[
+                {"role": "system", "content": field_prompt},
                 {"role": "user", "content": field_data_specification}
             ],
             response_format=FarmActivitiesLite,
         plant_response = client.beta.chat.completions.parse(
             model=model_version,  # Use GPT model that supports structured output
             messages=[
+                {"role": "system", "content": plant_prompt},
                 {"role": "user", "content": planting_data_specification}
             ],
             response_format=PlantingLite,
         log_response = client.beta.chat.completions.parse(
             model=model_version,  # Use GPT model that supports structured output
             messages=[
+                {"role": "system", "content": log_prompt},
                 {"role": "user", "content": logs_data_specification}
             ],
             response_format=Log,
         soil_response = client.beta.chat.completions.parse(
             model=model_version,  # Use GPT model that supports structured output
             messages=[
+                {"role": "system", "content": soil_prompt},
                 {"role": "user", "content": soil_data_specification}
             ],
             response_format=Soil,
         yield_response = client.beta.chat.completions.parse(
             model=model_version,  # Use GPT model that supports structured output
             messages=[
+                {"role": "system", "content": yield_prompt},
                 {"role": "user", "content": yield_data_specification}
             ],
             response_format=Yield,
         input_data["input_text"] = (str) input text
     """
     if parameters["stepwise_json_creation"] == "stepwisejsoncreation":
+        if parameters["chaining"]:
+            pre_processing_prompts = [parameters["context_pre_prompt"], parameters["summary_pre_prompt"], parameters["conversation_pre_prompt"], parameters["example_pre_prompt"]]
+            one_giant_preprompt = ""
+            input_data["input_text_pieces"]["pre_processed_pieces"] = {}
+            for pre_prompt in pre_processing_prompts:
+                if pre_prompt:
+                    one_giant_preprompt += pre_prompt
+            for text_label, text_body in input_data["input_text_pieces"].items():
+                response = client.chat.completions.create(
+                                model=parameters["model_version"],
+                                messages=[
+                                    {"role": "system", "content": one_giant_preprompt},
+                                    {"role": "user", "content": text_body}
+                                ]
+                            )
+                response_text = response.choices[0].message.content
+                input_data["input_text_pieces"]["pre_processed_pieces"][text_label] = response_text
+        else:
+            input_context = f"You are processing farm activity, interactions, and trial data. Here's important context of the data {parameters['combined_prompt']}. With this context in mind, "
+            input_data["input_context"] = input_context
+            return input_data
     if parameters["stepwise_json_creation"] == "singlejsoncreation":
         processed_data
             processed_data["input_text"] = (str) the raw input text
     """
+    print("PROCESSING SURVEY STACK DATA")
     processed_data = {}
     farm_management_inputs = data[0]['data']['group_4']
     processed_data["stepwise_json_creation"] = data[0]['data']['stepwisejsoncreation']['value']
+    print("STEPWISE?: " + str(processed_data["stepwise_json_creation"]))
     if processed_data["stepwise_json_creation"] == "stepwisejsoncreation":
+        print("IN THE STEP")
         processed_data["input_text_pieces"] = {}
         processed_data["input_text_pieces"]["field_data_input"] = farm_management_inputs.get('field_data_input', {}).get('value', None)
         processed_data["input_text_pieces"]["planting_data_input"] = farm_management_inputs.get('planting_data_input', {}).get('value', None)
         processed_data["input_text_pieces"]["yield_data_input"] = farm_management_inputs.get('yield_data_input', {}).get('value', None)
         processed_data["input_text"] = "EMPTY"
     elif processed_data["stepwise_json_creation"] == "singlejsoncreation":
+        print("IN THE SINGLE")
         processed_data["input_text"] = data[0]['data']['onelonginputtext']['value']
         processed_data["input_text_pieces"] = {}
         processed_data["input_text_pieces"]["field_data_input"] = "EMPTY"
         processed_data["input_text_pieces"]["soil_data_input"] = "EMPTY"
         processed_data["input_text_pieces"]["yield_data_input"] = "EMPTY"
+    print("RETURNING DATA")
+    print(processed_data)
     return processed_data