Spaces:

our-sci
/

data-translation-experiments

Sleeping

App Files Files Community

rosemariafontana commited on Oct 30, 2024

Commit

b3c2c53

verified ·

1 Parent(s): 2a00c62

Update process_data.py

Browse files

Files changed (1) hide show

process_data.py +90 -12

process_data.py CHANGED Viewed

@@ -20,13 +20,25 @@ def generate_json(input_data, model_version):
     Function to prompt OpenAI API to generate structured JSON output.
     """
     try:
         #Call OpenAI API to generate structured output based on prompt
         farm_info_response = client.beta.chat.completions.parse(
             model=model_version,  # Use GPT model that supports structured output
             messages=[
-                {"role": "system", "content": "Extract the farm information."},
-                {"role": "user", "content": input_data}
             ],
             response_format=FarmActivities,
         )
@@ -35,16 +47,18 @@ def generate_json(input_data, model_version):
             raise ValueError(f"API error: {interactions_response['error']['message']}")
         farm_generated_json = farm_info_response.choices[0].message.parsed
         print("FARM JSON: ")
         print(farm_generated_json) # debugging
         farm_pretty_json = farm_generated_json.json()
         interactions_response = client.beta.chat.completions.parse(
             model=model_version,  # Use GPT model that supports structured output
             messages=[
-                {"role": "system", "content": "Extract the interactions information."},
-                {"role": "user", "content": specification}
             ],
             response_format=Interactions,
         )
@@ -56,15 +70,14 @@ def generate_json(input_data, model_version):
         print("INTERACTIONS JSON: ")
         print(interactions_generated_json) # debugging 2
         interactions_pretty_json = interactions_generated_json.json()
         trial_response = client.beta.chat.completions.parse(
             model=model_version,  # Use GPT model that supports structured output
             messages=[
-                {"role": "system", "content": "Extract the trial information."},
-                {"role": "user", "content": specification}
             ],
             response_format=Trial,
         )
@@ -126,7 +139,7 @@ def generate_json_pieces(specification, model_version, additional_json_creation_
         log_response = client.beta.chat.completions.parse(
             model=model_version,  # Use GPT model that supports structured output
             messages=[
-                {"role": "system", "content": "Extract the planting information."},
                 {"role": "user", "content": logs_data_specification}
             ],
             response_format=Log,
@@ -135,7 +148,7 @@ def generate_json_pieces(specification, model_version, additional_json_creation_
         soil_response = client.beta.chat.completions.parse(
             model=model_version,  # Use GPT model that supports structured output
             messages=[
-                {"role": "system", "content": "Extract the planting information."},
                 {"role": "user", "content": soil_data_specification}
             ],
             response_format=Soil,
@@ -144,7 +157,7 @@ def generate_json_pieces(specification, model_version, additional_json_creation_
         yield_response = client.beta.chat.completions.parse(
             model=model_version,  # Use GPT model that supports structured output
             messages=[
-                {"role": "system", "content": "Extract the planting information."},
                 {"role": "user", "content": yield_data_specification}
             ],
             response_format=Yield,
@@ -186,15 +199,80 @@ def generate_json_pieces(specification, model_version, additional_json_creation_
 #    return output1, output2, output3
 def process_specifications(input_data, parameters):
     # here is where parsing and other things will happen before
-    return generate_json(input_data["input_text"], parameters["model_version"])
 def parse_survey_stack_parameters(data):
     processed_data = {}
     processed_data["model_version"] = data[0]['data']['modelversion']['value'][0]
     return processed_data

     Function to prompt OpenAI API to generate structured JSON output.
     """
+    input_text = input_data["input_text"]
+    farm_prompt = "Extract the farm information."
+    interactions_prompt = "Extract the interactions information."
+    trial_prompt = "Extract the trial information."
+    if input_data["input_context"]:
+        farm_prompt = input_data["input_context"] + farm_prompt
+        interactions_prompt = input_data["input_context"] + interactions_prompt
+        trial_prompt = input_data["input_context"] + trial_prompt
     try:
         #Call OpenAI API to generate structured output based on prompt
         farm_info_response = client.beta.chat.completions.parse(
             model=model_version,  # Use GPT model that supports structured output
             messages=[
+                {"role": "system", "content": farm_prompt},
+                {"role": "user", "content": input_text}
             ],
             response_format=FarmActivities,
         )
             raise ValueError(f"API error: {interactions_response['error']['message']}")
         farm_generated_json = farm_info_response.choices[0].message.parsed
         print("FARM JSON: ")
         print(farm_generated_json) # debugging
         farm_pretty_json = farm_generated_json.json()
         interactions_response = client.beta.chat.completions.parse(
             model=model_version,  # Use GPT model that supports structured output
             messages=[
+                {"role": "system", "content": interactions_prompt},
+                {"role": "user", "content": input_text}
             ],
             response_format=Interactions,
         )
         print("INTERACTIONS JSON: ")
         print(interactions_generated_json) # debugging 2
         interactions_pretty_json = interactions_generated_json.json()
         trial_response = client.beta.chat.completions.parse(
             model=model_version,  # Use GPT model that supports structured output
             messages=[
+                {"role": "system", "content": trial_prompt},
+                {"role": "user", "content": input_text}
             ],
             response_format=Trial,
         )
         log_response = client.beta.chat.completions.parse(
             model=model_version,  # Use GPT model that supports structured output
             messages=[
+                {"role": "system", "content": "Extract the log information."},
                 {"role": "user", "content": logs_data_specification}
             ],
             response_format=Log,
         soil_response = client.beta.chat.completions.parse(
             model=model_version,  # Use GPT model that supports structured output
             messages=[
+                {"role": "system", "content": "Extract the soil information."},
                 {"role": "user", "content": soil_data_specification}
             ],
             response_format=Soil,
         yield_response = client.beta.chat.completions.parse(
             model=model_version,  # Use GPT model that supports structured output
             messages=[
+                {"role": "system", "content": "Extract the yield information."},
                 {"role": "user", "content": yield_data_specification}
             ],
             response_format=Yield,
 #    return output1, output2, output3
+def pre_processing(input_data, parameters):
+    # in the event there's a pre-prompt, process
+    if processed_data["chaining"]:
+        input_text = input_data["input_text"]
+        pre_processing_list = [processed_data["context_pre_prompt"], processed_data["summary_pre_prompt"], processed_data["conversation_pre_prompt"], processed_data["example_pre_prompt"]]
+        for pre_prompt in pre_processing_list:
+            response = client.chat.completions.create(
+                model=parameters["model_version"],
+                messages=[
+                    {"role": "system", "content": pre_prompt},
+                    {"role": "user", "content": input_text}
+                ]
+            )
+            response_text = response['choices'][0]['message']['content']
+            try:
+                partially_processed_input = json.loads(response_text)
+                input_text = partially_processed_input
+            except Exception as e:
+                print("Failed to parse response as JSON. Error was:")
+                print(e)
+        input_data["input_context"] = False
+        input_data["input_text"] = input_text
+        return input_data
+    else:
+        input_context = f"You are processing farm activity, interactions, and trial data. Here's important context of the data {processed_data["combined_prompt"]}. With this context in mind, "
+        input_data["input_context"] = input_context
+        return input_data
 def process_specifications(input_data, parameters):
     # here is where parsing and other things will happen before
+    if parameters["pre_prompt"] == True:
+        input_data = pre_processing(input_data, parameters)
+        return generate_json(input_data, parameters["model_version"])
+    else:
+        return generate_json(input_data["input_text"], parameters["model_version"])
 def parse_survey_stack_parameters(data):
     processed_data = {}
     processed_data["model_version"] = data[0]['data']['modelversion']['value'][0]
+    if data[0]['data']['preprompt']['value'][0] == 'continue_preprompts':
+        processed_data["pre_prompt"] = True
+        processed_data["context_pre_prompt"] = data[0]['data']['contextpreprompt']['value']
+        processed_data["summary_pre_prompt"] = data[0]['data']['summarypreprompt']['value']
+        processed_data["conversation_pre_prompt"] = data[0]['data']['conversationpreprompt']['value']
+        processed_data["example_pre_prompt"] = data[0]['data']['examplepreprompt']['value']
+        processed_data["chaining"] = data[0]['data']['prepromptchaining']['value'][0]
+        if processed_data["chaining"] == "no":
+            combined_prompt = " ".join(filter(None, [processed_data["context_pre_prompt"], processed_data["summary_pre_prompt"], processed_data["conversation_pre_prompt"], processed_data["example_pre_prompt"]]))
+            processed_data["combined_prompt"] = combined_prompt
+            processed_data["chaining"] = False
+        else:
+            processed_data["chaining"] = True
+            processed_data["combined_pre_prompt"] = None
+    else:
+        processed_data["pre_prompt"] = False
+        processed_data["context_pre_prompt"] = None
+        processed_data["summary_pre_prompt"] = None
+        processed_data["conversation_pre_prompt"] = None
+        processed_data["example_pre_prompt"] = None
+        processed_data["chaining"] = None
+        processed_data["combined_pre_prompt"] = None
     return processed_data