Spaces:

our-sci
/

data-translation-experiments

Sleeping

App Files Files Community

rosemariafontana commited on Nov 14, 2024

Commit

443fc96

verified ·

1 Parent(s): ee54d53

Update process_data.py

Browse files

Files changed (1) hide show

process_data.py +14 -21

process_data.py CHANGED Viewed

@@ -118,7 +118,7 @@ def generate_json_pieces(input_data, parameters):
         input_data: (dict) The input data, preprocessed, from the user. Aka what will fill in the JSON
             input_data["input_text"] = (str) the preprocessed input text
             input_data["input_context"] = (str) depending on levers, empty or what is put in front of the prompt
-            input_data["input_text_pieces"] = (dict) containing the individual split up prompt pieces: field_data_input, planting_data_input, logs_data_input, soil_data_input, yield_data_input
         parameters: (dict) All of the individual parameters and "flippers"
             parameters["model_version"] = (str) what model should be used
@@ -142,14 +142,14 @@ def generate_json_pieces(input_data, parameters):
         print("Pre prompt is true")
         field_data_input = input_data["input_text_pieces"]["pre_processed_pieces"]["field_data_input"]
         planting_data_input = input_data["input_text_pieces"]["pre_processed_pieces"]["planting_data_input"]
-        logs_data_input = input_data["input_text_pieces"]["pre_processed_pieces"]["logs_data_input"]
         soil_data_input = input_data["input_text_pieces"]["pre_processed_pieces"]["soil_data_input"]
         yield_data_input = input_data["input_text_pieces"]["pre_processed_pieces"]["yield_data_input"]
     else:
         print("Pre prompt is false")
         field_data_input = input_data["input_text_pieces"]["field_data_input"]
         planting_data_input = input_data["input_text_pieces"]["planting_data_input"]
-        logs_data_input = input_data["input_text_pieces"]["logs_data_input"]
         soil_data_input = input_data["input_text_pieces"]["soil_data_input"]
         yield_data_input = input_data["input_text_pieces"]["yield_data_input"]
@@ -166,8 +166,8 @@ def generate_json_pieces(input_data, parameters):
     # figure out what happens when there's
     # chaining, pre-prompts, context, etc ....
-    #if not parameters["chaining"] and input_data["input_context"]:
-    #    farm_prompt = input_data["input_text_pieces"]["field_data_input"] + farm_prompt
     #    farm_prompt = input_data["input_text_pieces"]["field_data_input"] + farm_prompt
     #    farm_prompt = input_data["input_text_pieces"]["field_data_input"] + farm_prompt
     #    farm_prompt = input_data["input_text_pieces"]["field_data_input"] + farm_prompt
@@ -179,7 +179,7 @@ def generate_json_pieces(input_data, parameters):
             model=model_version,  # Use GPT model that supports structured output
             messages=[
                 {"role": "system", "content": field_prompt},
-                {"role": "user", "content": field_data_specification}
             ],
             response_format=FarmActivitiesLite,
         )
@@ -188,7 +188,7 @@ def generate_json_pieces(input_data, parameters):
             model=model_version,  # Use GPT model that supports structured output
             messages=[
                 {"role": "system", "content": plant_prompt},
-                {"role": "user", "content": planting_data_specification}
             ],
             response_format=PlantingLite,
         )
@@ -197,7 +197,7 @@ def generate_json_pieces(input_data, parameters):
             model=model_version,  # Use GPT model that supports structured output
             messages=[
                 {"role": "system", "content": log_prompt},
-                {"role": "user", "content": logs_data_specification}
             ],
             response_format=Log,
         )
@@ -206,7 +206,7 @@ def generate_json_pieces(input_data, parameters):
             model=model_version,  # Use GPT model that supports structured output
             messages=[
                 {"role": "system", "content": soil_prompt},
-                {"role": "user", "content": soil_data_specification}
             ],
             response_format=Soil,
         )
@@ -215,7 +215,7 @@ def generate_json_pieces(input_data, parameters):
             model=model_version,  # Use GPT model that supports structured output
             messages=[
                 {"role": "system", "content": yield_prompt},
-                {"role": "user", "content": yield_data_specification}
             ],
             response_format=Yield,
         )
@@ -244,7 +244,7 @@ def pre_processing(input_data, parameters):
         input_data: (dict) The input data, preprocessed, from the user. Aka what will fill in the JSON
             input_data["input_text"] = (str) the preprocessed input text
             input_data["input_context"] = (str) depending on levers, empty or what is put in front of the prompt
-            input_data["input_text_pieces"] = (dict) containing the individual split up prompt pieces: field_data_input, planting_data_input, logs_data_input, soil_data_input, yield_data_input
         parameters: (dict) All of the individual parameters and "flippers"
             parameters["model_version"] = (str) what model should be used
@@ -261,14 +261,6 @@ def pre_processing(input_data, parameters):
     if input_data["stepwise_json_creation"] == "stepwisejsoncreation":
         if parameters["chaining"]:
-            pre_processing_prompts = [parameters["context_pre_prompt"], parameters["summary_pre_prompt"], parameters["conversation_pre_prompt"], parameters["example_pre_prompt"]]
-            one_giant_preprompt = ""
-            input_data["input_text_pieces"]["pre_processed_pieces"] = {}
-            for pre_prompt in pre_processing_prompts:
-                if pre_prompt:
-                    one_giant_preprompt += pre_prompt
             for text_label, text_body in input_data["input_text_pieces"].items():
                 response = client.chat.completions.create(
@@ -282,7 +274,8 @@ def pre_processing(input_data, parameters):
                 response_text = response.choices[0].message.content
                 input_data["input_text_pieces"]["pre_processed_pieces"][text_label] = response_text
         else:
-            input_context = f"You are processing farm activity, interactions, and trial data. Here's important context of the data {parameters['combined_prompt']}. With this context in mind, "
             input_data["input_context"] = input_context
             return input_data
@@ -342,7 +335,7 @@ def process_specifications(input_data, parameters):
         input_data: (dict) The input data, preprocessed, from the user. Aka what will fill in the JSON
             input_data["input_text"] = (str) the preprocessed input text
             input_data["input_context"] = (str) depending on levers, empty or what is put in front of the prompt
-            input_data["input_text_pieces"] = (dict) containing the individual split up prompt pieces: field_data_input, planting_data_input, logs_data_input, soil_data_input, yield_data_input
         parameters: (dict) All of the individual parameters and "flippers"
             parameters["pre_prompt"] = (bool) whether or not there is a pre-prompt to process through pre_processing()

         input_data: (dict) The input data, preprocessed, from the user. Aka what will fill in the JSON
             input_data["input_text"] = (str) the preprocessed input text
             input_data["input_context"] = (str) depending on levers, empty or what is put in front of the prompt
+            input_data["input_text_pieces"] = (dict) containing the individual split up prompt pieces: field_data_input, planting_data_input, log_data_input, soil_data_input, yield_data_input
         parameters: (dict) All of the individual parameters and "flippers"
             parameters["model_version"] = (str) what model should be used
         print("Pre prompt is true")
         field_data_input = input_data["input_text_pieces"]["pre_processed_pieces"]["field_data_input"]
         planting_data_input = input_data["input_text_pieces"]["pre_processed_pieces"]["planting_data_input"]
+        log_data_input = input_data["input_text_pieces"]["pre_processed_pieces"]["log_data_input"]
         soil_data_input = input_data["input_text_pieces"]["pre_processed_pieces"]["soil_data_input"]
         yield_data_input = input_data["input_text_pieces"]["pre_processed_pieces"]["yield_data_input"]
     else:
         print("Pre prompt is false")
         field_data_input = input_data["input_text_pieces"]["field_data_input"]
         planting_data_input = input_data["input_text_pieces"]["planting_data_input"]
+        log_data_input = input_data["input_text_pieces"]["log_data_input"]
         soil_data_input = input_data["input_text_pieces"]["soil_data_input"]
         yield_data_input = input_data["input_text_pieces"]["yield_data_input"]
     # figure out what happens when there's
     # chaining, pre-prompts, context, etc ....
+    if input_data["input_context"]:
+        field_prompt = input_data["input_text_pieces"]["field_data_input"] + field_prompt
     #    farm_prompt = input_data["input_text_pieces"]["field_data_input"] + farm_prompt
     #    farm_prompt = input_data["input_text_pieces"]["field_data_input"] + farm_prompt
     #    farm_prompt = input_data["input_text_pieces"]["field_data_input"] + farm_prompt
             model=model_version,  # Use GPT model that supports structured output
             messages=[
                 {"role": "system", "content": field_prompt},
+                {"role": "user", "content": field_data_input}
             ],
             response_format=FarmActivitiesLite,
         )
             model=model_version,  # Use GPT model that supports structured output
             messages=[
                 {"role": "system", "content": plant_prompt},
+                {"role": "user", "content": planting_data_input}
             ],
             response_format=PlantingLite,
         )
             model=model_version,  # Use GPT model that supports structured output
             messages=[
                 {"role": "system", "content": log_prompt},
+                {"role": "user", "content": log_data_input}
             ],
             response_format=Log,
         )
             model=model_version,  # Use GPT model that supports structured output
             messages=[
                 {"role": "system", "content": soil_prompt},
+                {"role": "user", "content": soil_data_input}
             ],
             response_format=Soil,
         )
             model=model_version,  # Use GPT model that supports structured output
             messages=[
                 {"role": "system", "content": yield_prompt},
+                {"role": "user", "content": yield_data_input}
             ],
             response_format=Yield,
         )
         input_data: (dict) The input data, preprocessed, from the user. Aka what will fill in the JSON
             input_data["input_text"] = (str) the preprocessed input text
             input_data["input_context"] = (str) depending on levers, empty or what is put in front of the prompt
+            input_data["input_text_pieces"] = (dict) containing the individual split up prompt pieces: field_data_input, planting_data_input, log_data_input, soil_data_input, yield_data_input
         parameters: (dict) All of the individual parameters and "flippers"
             parameters["model_version"] = (str) what model should be used
     if input_data["stepwise_json_creation"] == "stepwisejsoncreation":
         if parameters["chaining"]:
             for text_label, text_body in input_data["input_text_pieces"].items():
                 response = client.chat.completions.create(
                 response_text = response.choices[0].message.content
                 input_data["input_text_pieces"]["pre_processed_pieces"][text_label] = response_text
         else:
+            # Add in here the other schemas (interactions and trials)
+            input_context = f"You are processing farm activity data. Here's important context of the data {parameters['combined_prompt']}. With this context in mind, "
             input_data["input_context"] = input_context
             return input_data
         input_data: (dict) The input data, preprocessed, from the user. Aka what will fill in the JSON
             input_data["input_text"] = (str) the preprocessed input text
             input_data["input_context"] = (str) depending on levers, empty or what is put in front of the prompt
+            input_data["input_text_pieces"] = (dict) containing the individual split up prompt pieces: field_data_input, planting_data_input, log_data_input, soil_data_input, yield_data_input
         parameters: (dict) All of the individual parameters and "flippers"
             parameters["pre_prompt"] = (bool) whether or not there is a pre-prompt to process through pre_processing()