Spaces:

our-sci
/

data-translation-experiments

Sleeping

App Files Files Community

rosemariafontana commited on Nov 13, 2024

Commit

9fe6dfb

verified ·

1 Parent(s): c821785

Update process_data.py

Browse files

Files changed (1) hide show

process_data.py +103 -97

process_data.py CHANGED Viewed

@@ -132,26 +132,19 @@ def generate_json_pieces(input_data, parameters):
     """
     specification = input_data["input_text"]
     model_version = parameters["model_version"]
-    additional_json_creation_options = parameters["additional_json_pieces_options"]
-    field_data_input = input_data["input_text_pieces"]["field_data_input"]
-    planting_data_input = input_data["input_text_pieces"]["planting_data_input"]
-    logs_data_input = input_data["input_text_pieces"]["logs_data_input"]
-    soil_data_input = input_data["input_text_pieces"]["soil_data_input"]
-    yield_data_input = input_data["input_text_pieces"]["yield_data_input"]
-    if additional_json_creation_options == "Explicit specific pieces":
-        field_data_specification = field_data_input
-        planting_data_specification = planting_data_input
-        logs_data_specification = logs_data_input
-        soil_data_specification = soil_data_input
-        yield_data_specification = yield_data_input
-    elif additional_json_creation_options == "Parse from one big input text":
-        field_data_specification = specification
-        planting_data_specification = specification
-        logs_data_specification = specification
-        soil_data_specification = specification
-        yield_data_specification = specification
     try:
         # Call OpenAI API to generate structured output based on prompt
@@ -214,21 +207,6 @@ def generate_json_pieces(input_data, parameters):
     except Exception as e:
         return {"error": "Failed to generate valid JSON. " + str(e)}
-#def process_specifications(data, model_version, json_creation, additional_json_creation_options, field_data_input, planting_data_input, logs_data_input, soil_data_input, yield_data_input):
-#    # This method just drives the process
-    # Uncomment when working on flippers
-    #if json_creation == "Single JSON Creation":
-    #    resulting_schema = generate_json(data, model_version)
-    #elif json_creation == "Step-wise JSON Creation":
-    #    resulting_schema = generate_json_pieces(data, model_version, additional_json_creation_options, field_data_input, planting_data_input, logs_data_input, soil_data_input, yield_data_input)
-    #return resulting_schema
-#    global original_outputs, xml_outputs
-#    output1, output2, output3 = generate_json(data, model_version)
-#    return output1, output2, output3
 def pre_processing(input_data, parameters):
     """
@@ -252,51 +230,73 @@ def pre_processing(input_data, parameters):
         input_data["input_context"] = (bool) whether the input text should be used as context or not
         input_data["input_text"] = (str) input text
     """
-    if parameters["chaining"]:
-        input_text = input_data["input_text"]
-        pre_processing_list = [parameters["context_pre_prompt"], parameters["summary_pre_prompt"], parameters["conversation_pre_prompt"], parameters["example_pre_prompt"]]
-        print("PreProcessingList")
-        print(pre_processing_list)
-        for pre_prompt in pre_processing_list:
-            try:
-                print("Pre-Processing: ")
-                if pre_prompt:
-                    print("Prompt: ")
-                    print(pre_prompt)
-                    print("Input Text: ")
-                    print(input_text)
-                    print("Model: ")
-                    print(parameters["model_version"])
-                    response = client.chat.completions.create(
-                        model=parameters["model_version"],
-                        messages=[
-                            {"role": "system", "content": pre_prompt},
-                            {"role": "user", "content": input_text}
-                        ]
-                    )
-                    response_text = response.choices[0].message.content
-                    print("Response Text: ")
-                    print(response_text)
-                    input_text = response_text
-            except Exception as e:
-                print(f"Failed to parse response as JSON. Error was: {e}")
-        input_data["input_context"] = False
-        input_data["input_text"] = input_text
-        return input_data
-    else:
-        input_context = f"You are processing farm activity, interactions, and trial data. Here's important context of the data {parameters['combined_prompt']}. With this context in mind, "
-        input_data["input_context"] = input_context
-        return input_data
@@ -322,13 +322,19 @@ def process_specifications(input_data, parameters):
         3 processed data-filled JSON objects: farm_pretty_json, interactions_pretty_json, trial_pretty_json
     """
     # here is where parsing and other things will happen before
-    if parameters["pre_prompt"] == True:
-        processed_input = pre_processing(input_data, parameters)
-        return generate_json(processed_input, parameters)
-    else:
         input_data["input_context"] = False
-        input_data["input_text"] = input_data["input_text"]
-        return generate_json(input_data, parameters)
 def parse_survey_stack_parameters(data):
@@ -414,26 +420,26 @@ def parse_survey_stack_data(data):
     """
     processed_data = {}
     processed_data["stepwise_json_creation"] = data[0]['data']['stepwisejsoncreation']['value']
     if processed_data["stepwise_json_creation"] == "stepwisejsoncreation":
-        processed_data["stepwise_json_inputs"] = {}
-        processed_data["stepwise_json_inputs"]["field_data_input"] = data[0]['data']['field_data_input']['value']
-        processed_data["stepwise_json_inputs"]["planting_data_input"] = data[0]['data']['planting_data_input']['value']
-        processed_data["stepwise_json_inputs"]["log_data_input"] = data[0]['data']['log_data_input']['value']
-        processed_data["stepwise_json_inputs"]["soil_data_input"] = data[0]['data']['soil_data_input']['value']
-        processed_data["stepwise_json_inputs"]["yield_data_input"] = data[0]['data']['yield_data_input']['value']
         processed_data["input_text"] = "EMPTY"
     elif processed_data["stepwise_json_creation"] == "singlejsoncreation":
         processed_data["input_text"] = data[0]['data']['onelonginputtext']['value']
-        processed_data["stepwise_json_inputs"] = {}
-        processed_data["stepwise_json_inputs"]["field_data_input"] = "EMPTY"
-        processed_data["stepwise_json_inputs"]["planting_data_input"] = "EMPTY"
-        processed_data["stepwise_json_inputs"]["log_data_input"] = "EMPTY"
-        processed_data["stepwise_json_inputs"]["soil_data_input"] = "EMPTY"
-        processed_data["stepwise_json_inputs"]["yield_data_input"] = "EMPTY"
     return processed_data

     """
     specification = input_data["input_text"]
     model_version = parameters["model_version"]
+    if parameters["pre_prompt"] == True:
+        field_data_input = input_data["input_text_pieces"]["field_data_input"]
+        planting_data_input = input_data["input_text_pieces"]["planting_data_input"]
+        logs_data_input = input_data["input_text_pieces"]["logs_data_input"]
+        soil_data_input = input_data["input_text_pieces"]["soil_data_input"]
+        yield_data_input = input_data["input_text_pieces"]["yield_data_input"]
+    else:
+        field_data_input = input_data["input_text_pieces"]["pre_processed_pieces"]["field_data_input"]
+        planting_data_input = input_data["input_text_pieces"]["pre_processed_pieces"]["planting_data_input"]
+        logs_data_input = input_data["input_text_pieces"]["pre_processed_pieces"]["logs_data_input"]
+        soil_data_input = input_data["input_text_pieces"]["pre_processed_pieces"]["soil_data_input"]
+        yield_data_input = input_data["input_text_pieces"]["pre_processed_pieces"]["yield_data_input"]
     try:
         # Call OpenAI API to generate structured output based on prompt
     except Exception as e:
         return {"error": "Failed to generate valid JSON. " + str(e)}
 def pre_processing(input_data, parameters):
     """
         input_data["input_context"] = (bool) whether the input text should be used as context or not
         input_data["input_text"] = (str) input text
     """
+    if parameters["stepwise_json_creation"] == "stepwisejsoncreation":
+        pre_processing_prompts = [parameters["context_pre_prompt"], parameters["summary_pre_prompt"], parameters["conversation_pre_prompt"], parameters["example_pre_prompt"]]
+        one_giant_preprompt = ""
+        input_data["input_text_pieces"]["pre_processed_pieces"] = {}
+        for pre_prompt in pre_processing_prompts:
+            if pre_prompt:
+                one_giant_preprompt += pre_prompt
+        for text_label, text_body in input_data["input_text_pieces"].items():
+            response = client.chat.completions.create(
+                            model=parameters["model_version"],
+                            messages=[
+                                {"role": "system", "content": one_giant_preprompt},
+                                {"role": "user", "content": text_body}
+                            ]
+                        )
+            response_text = response.choices[0].message.content
+            input_data["input_text_pieces"]["pre_processed_pieces"][text_label] = response_text
+    if parameters["stepwise_json_creation"] == "singlejsoncreation":
+        if parameters["chaining"]:
+            input_text = input_data["input_text"]
+            pre_processing_list = [parameters["context_pre_prompt"], parameters["summary_pre_prompt"], parameters["conversation_pre_prompt"], parameters["example_pre_prompt"]]
+            print("PreProcessingList")
+            print(pre_processing_list)
+            for pre_prompt in pre_processing_list:
+                try:
+                    print("Pre-Processing: ")
+                    if pre_prompt:
+                        print("Prompt: ")
+                        print(pre_prompt)
+                        print("Input Text: ")
+                        print(input_text)
+                        print("Model: ")
+                        print(parameters["model_version"])
+                        response = client.chat.completions.create(
+                            model=parameters["model_version"],
+                            messages=[
+                                {"role": "system", "content": pre_prompt},
+                                {"role": "user", "content": input_text}
+                            ]
+                        )
+                        response_text = response.choices[0].message.content
+                        print("Response Text: ")
+                        print(response_text)
+                        input_text = response_text
+                except Exception as e:
+                    print(f"Failed to parse response as JSON. Error was: {e}")
+            input_data["input_context"] = False
+            input_data["input_text"] = input_text
+            return input_data
+        else:
+            input_context = f"You are processing farm activity, interactions, and trial data. Here's important context of the data {parameters['combined_prompt']}. With this context in mind, "
+            input_data["input_context"] = input_context
+            return input_data
         3 processed data-filled JSON objects: farm_pretty_json, interactions_pretty_json, trial_pretty_json
     """
     # here is where parsing and other things will happen before
+    if parameters["stepwise_json_creation"] == "stepwisejsoncreation":
+        if parameters["pre_prompt"] == True:
+            processed_input = pre_processing(input_data, parameters)
+        else:
+            processed_input = input_data
+        return generate_json_pieces(processed_input, parameters)
+    elif parameters["stepwise_json_creation"] == "singlejsoncreation":
         input_data["input_context"] = False
+        if parameters["pre_prompt"] == True:
+            processed_input = pre_processing(input_data, parameters)
+        else:
+            processed_input = input_data
+        return generate_json(processed_input, parameters)
 def parse_survey_stack_parameters(data):
     """
     processed_data = {}
+    farm_management_inputs = data[0]['data']['group_4']
     processed_data["stepwise_json_creation"] = data[0]['data']['stepwisejsoncreation']['value']
     if processed_data["stepwise_json_creation"] == "stepwisejsoncreation":
+        processed_data["input_text_pieces"] = {}
+        processed_data["input_text_pieces"]["field_data_input"] = farm_management_inputs.get('field_data_input', {}).get('value', None)
+        processed_data["input_text_pieces"]["planting_data_input"] = farm_management_inputs.get('planting_data_input', {}).get('value', None)
+        processed_data["input_text_pieces"]["log_data_input"] = farm_management_inputs.get('log_data_input', {}).get('value', None)
+        processed_data["input_text_pieces"]["soil_data_input"] = farm_management_inputs.get('soil_data_input', {}).get('value', None)
+        processed_data["input_text_pieces"]["yield_data_input"] = farm_management_inputs.get('yield_data_input', {}).get('value', None)
         processed_data["input_text"] = "EMPTY"
     elif processed_data["stepwise_json_creation"] == "singlejsoncreation":
         processed_data["input_text"] = data[0]['data']['onelonginputtext']['value']
+        processed_data["input_text_pieces"] = {}
+        processed_data["input_text_pieces"]["field_data_input"] = "EMPTY"
+        processed_data["input_text_pieces"]["planting_data_input"] = "EMPTY"
+        processed_data["input_text_pieces"]["log_data_input"] = "EMPTY"
+        processed_data["input_text_pieces"]["soil_data_input"] = "EMPTY"
+        processed_data["input_text_pieces"]["yield_data_input"] = "EMPTY"
     return processed_data