Spaces:

our-sci
/

data-translation-experiments

Sleeping

App Files Files Community

rosemariafontana commited on Nov 12, 2024

Commit

915b500

verified ·

1 Parent(s): 9eec8db

Update process_data.py

Browse files

Files changed (1) hide show

process_data.py +114 -6

process_data.py CHANGED Viewed

@@ -9,8 +9,10 @@ import json
 from schema_classes import FarmActivities, Interactions, Trial, FarmActivitiesLite, PlantingLite, Log, Soil, Yield
-# adding comment
-# Chatbot model
 os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")
 client = OpenAI()
@@ -18,6 +20,19 @@ client = OpenAI()
 def generate_json(input_data, parameters):
     """
     Function to prompt OpenAI API to generate structured JSON output.
     """
     input_text = input_data["input_text"]
@@ -33,7 +48,6 @@ def generate_json(input_data, parameters):
         trial_prompt = input_data["input_context"] + trial_prompt
     try:
         #Call OpenAI API to generate structured output based on prompt
@@ -94,7 +108,36 @@ def generate_json(input_data, parameters):
         return {"error": "Failed to generate valid JSON. " + str(e)}
 # This is for the step-wise JSON creation
-def generate_json_pieces(specification, model_version, additional_json_creation_options, field_data_input, planting_data_input, logs_data_input, soil_data_input, yield_data_input):
     if additional_json_creation_options == "Explicit specific pieces":
         field_data_specification = field_data_input
@@ -188,7 +231,27 @@ def generate_json_pieces(specification, model_version, additional_json_creation_
 #    return output1, output2, output3
 def pre_processing(input_data, parameters):
-    # in the event there's a pre-prompt, process
     if parameters["chaining"]:
@@ -238,6 +301,26 @@ def pre_processing(input_data, parameters):
 def process_specifications(input_data, parameters):
     # here is where parsing and other things will happen before
     if parameters["pre_prompt"] == True:
         processed_input = pre_processing(input_data, parameters)
@@ -249,6 +332,21 @@ def process_specifications(input_data, parameters):
 def parse_survey_stack_parameters(data):
     processed_data = {}
     processed_data["model_version"] = data[0]['data']['modelversion']['value'][0]
@@ -283,7 +381,7 @@ def parse_survey_stack_parameters(data):
                     ])
                 )
                 processed_data["chaining"] = False
-                processed_data["combined_prompt"] = combined_prompt
             else:
                 # Set combined_pre_prompt to None if chaining is enabled
                 processed_data["chaining"] = True
@@ -304,6 +402,16 @@ def parse_survey_stack_parameters(data):
     return processed_data
 def parse_survey_stack_data(data):
     processed_data = {}
     processed_data["input_text"] = data[0]['data']['inputtext']['value']

 from schema_classes import FarmActivities, Interactions, Trial, FarmActivitiesLite, PlantingLite, Log, Soil, Yield
+# This API key must be in a "secret" in your environment. This is generated from OpenAI or the company's website that creates the model you wish to engage with.
+# To use other models, some other endpoints would need to slightly change
+# As is, the endpoint used requires a model that is capable of OpenAI's structured outputs.
 os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")
 client = OpenAI()
 def generate_json(input_data, parameters):
     """
     Function to prompt OpenAI API to generate structured JSON output.
+    Args:
+        input_data: (dict) The input data, preprocessed, from the user. Aka what will fill in the JSON
+            input_data["input_text"] = the preprocessed input text
+            input_data["input_context"] = depending on levers, empty or what is put in front of the prompt
+        parameters: (dict) All of the individual parameters and "flippers"
+            parameters["model_version"] = (str) what model should be used
+            parameters["chaining"] = (bool) whether or not the preprocessed input context should be chained (given to multiple models)
+            parameters["context_pre_prompt"], parameters["summary_pre_prompt"], parameters["conversation_pre_prompt"], parameters["example_pre_prompt"] = (str) all of the pre-prompts, separated
+            parameters["combined_pre_prompt"] = (str) concatenated individual pre-prompts
+    Returns:
+        3 processed data-filled JSON objects: farm_pretty_json, interactions_pretty_json, trial_pretty_json
     """
     input_text = input_data["input_text"]
         trial_prompt = input_data["input_context"] + trial_prompt
     try:
         #Call OpenAI API to generate structured output based on prompt
         return {"error": "Failed to generate valid JSON. " + str(e)}
 # This is for the step-wise JSON creation
+def generate_json_pieces(input_data, parameters):
+    """
+    This is primarily for one of the flippers, which allows each individual JSON section to be created individually, then concatenates them all together.
+    It is proposed that perhaps the individual calls to the model will be more robust than giving the model all the data at once.
+    Args:
+        Args:
+        input_data: (dict) The input data, preprocessed, from the user. Aka what will fill in the JSON
+            input_data["input_text"] = (str) the preprocessed input text
+            input_data["input_context"] = (str) depending on levers, empty or what is put in front of the prompt
+            input_data["input_text_pieces"] = (dict) containing the individual split up prompt pieces: field_data_input, planting_data_input, logs_data_input, soil_data_input, yield_data_input
+        parameters: (dict) All of the individual parameters and "flippers"
+            parameters["model_version"] = (str) what model should be used
+            parameters["chaining"] = (bool) whether or not the preprocessed input context should be chained (given to multiple models)
+            parameters["context_pre_prompt"], parameters["summary_pre_prompt"], parameters["conversation_pre_prompt"], parameters["example_pre_prompt"] = (str) all of the pre-prompts, separated
+            parameters["combined_pre_prompt"] = (str) concatenated individual pre-prompts
+            parameters["additional_json_pieces_options"] = (str) "Explicit specific pieces" or "Parse from one big input text" to indicate whether it's many function calls on one input text or many function calls on smaller pieces of input texts
+    Returns:
+        (str - json) A final combined JSON containing the data filled schema for Farm Activites
+    """
+    specification = input_data["input_text"]
+    model_version = parameters["model_version"]
+    additional_json_creation_options = parameters["additional_json_pieces_options"]
+    field_data_input = input_data["input_text_pieces"]["field_data_input"]
+    planting_data_input = input_data["input_text_pieces"]["planting_data_input"]
+    logs_data_input = input_data["input_text_pieces"]["logs_data_input"]
+    soil_data_input = input_data["input_text_pieces"]["soil_data_input"]
+    yield_data_input = input_data["input_text_pieces"]["yield_data_input"]
     if additional_json_creation_options == "Explicit specific pieces":
         field_data_specification = field_data_input
 #    return output1, output2, output3
 def pre_processing(input_data, parameters):
+    """
+    In the event there's a pre-prompt, process the pre-prompts and input text accordingly
+    Args:
+        input_data: (dict) The input data, preprocessed, from the user. Aka what will fill in the JSON
+            input_data["input_text"] = (str) the preprocessed input text
+            input_data["input_context"] = (str) depending on levers, empty or what is put in front of the prompt
+            input_data["input_text_pieces"] = (dict) containing the individual split up prompt pieces: field_data_input, planting_data_input, logs_data_input, soil_data_input, yield_data_input
+        parameters: (dict) All of the individual parameters and "flippers"
+            parameters["model_version"] = (str) what model should be used
+            parameters["chaining"] = (bool) whether or not the preprocessed input context should be chained (given to multiple models)
+            parameters["context_pre_prompt"], parameters["summary_pre_prompt"], parameters["conversation_pre_prompt"], parameters["example_pre_prompt"] = (str) all of the pre-prompts, separated
+            parameters["combined_pre_prompt"] = (str) concatenated individual pre-prompts
+            parameters["additional_json_pieces_options"] = (str) "Explicit specific pieces" or "Parse from one big input text" to indicate whether it's many function calls on one input text or many function calls on smaller pieces of input texts
+    Returns:
+        (dict) input_data
+        input_data["input_context"] = (bool) whether the input text should be used as context or not
+        input_data["input_text"] = (str) input text
+    """
     if parameters["chaining"]:
 def process_specifications(input_data, parameters):
+    """
+    Once the parameters and data are processed, do the pre-processing and then generate JSONs
+    Args:
+        input_data: (dict) The input data, preprocessed, from the user. Aka what will fill in the JSON
+            input_data["input_text"] = (str) the preprocessed input text
+            input_data["input_context"] = (str) depending on levers, empty or what is put in front of the prompt
+            input_data["input_text_pieces"] = (dict) containing the individual split up prompt pieces: field_data_input, planting_data_input, logs_data_input, soil_data_input, yield_data_input
+        parameters: (dict) All of the individual parameters and "flippers"
+            parameters["pre_prompt"] = (bool) whether or not there is a pre-prompt to process through pre_processing()
+            parameters["model_version"] = (str) what model should be used
+            parameters["chaining"] = (bool) whether or not the preprocessed input context should be chained (given to multiple models)
+            parameters["context_pre_prompt"], parameters["summary_pre_prompt"], parameters["conversation_pre_prompt"], parameters["example_pre_prompt"] = (str) all of the pre-prompts, separated
+            parameters["combined_pre_prompt"] = (str) concatenated individual pre-prompts
+            parameters["additional_json_pieces_options"] = (str) "Explicit specific pieces" or "Parse from one big input text" to indicate whether it's many function calls on one input text or many function calls on smaller pieces of input texts
+    Returns:
+        3 processed data-filled JSON objects: farm_pretty_json, interactions_pretty_json, trial_pretty_json
+    """
     # here is where parsing and other things will happen before
     if parameters["pre_prompt"] == True:
         processed_input = pre_processing(input_data, parameters)
 def parse_survey_stack_parameters(data):
+    """
+    Parse the incoming parameters from the parameter survey
+    Args:
+        data: (json) JSON retrieved from surveystack API after retrieving survey info/details
+    Returns:
+        processed_data (dict)
+            processed_data["pre_prompt"] = (bool) whether or not there is a pre-prompt to process through pre_processing()
+            processed_data["model_version"] = (str) what model should be used
+            processed_data["chaining"] = (bool) whether or not the preprocessed input context should be chained (given to multiple models)
+            processed_data["context_pre_prompt"], parameters["summary_pre_prompt"], parameters["conversation_pre_prompt"], parameters["example_pre_prompt"] = (str) all of the pre-prompts, separated
+            processed_data["combined_pre_prompt"] = (str) concatenated individual pre-prompts
+            processed_data["additional_json_pieces_options"] = (str) "Explicit specific pieces" or "Parse from one big input text" to indicate whether it's many function calls on one input text or many function calls on smaller pieces of input texts
+    """
     processed_data = {}
     processed_data["model_version"] = data[0]['data']['modelversion']['value'][0]
                     ])
                 )
                 processed_data["chaining"] = False
+                processed_data["combined_pre_prompt"] = combined_prompt
             else:
                 # Set combined_pre_prompt to None if chaining is enabled
                 processed_data["chaining"] = True
     return processed_data
 def parse_survey_stack_data(data):
+    """
+    Parse the incoming data from the survey stack survey
+    Args:
+        data: (json) JSON retrieved from surveystack API after retrieving survey info/details
+    Returns:
+        processed_data
+            processed_data["input_text"] = (str) the raw input text
+    """
     processed_data = {}
     processed_data["input_text"] = data[0]['data']['inputtext']['value']