Spaces:

our-sci
/

data-translation-experiments

Sleeping

App Files Files Community

rosemariafontana commited on Nov 17, 2024

Commit

9448805

verified ·

1 Parent(s): 14b84fe

Update process_data.py

Browse files

Files changed (1) hide show

process_data.py +65 -16

process_data.py CHANGED Viewed

@@ -16,24 +16,73 @@ from schema_classes import FarmActivities, Interactions, Trial, FarmActivitiesLi
 os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")
 client = OpenAI()
-# What the data input survey should look like:
-# Do you want to enter your text in one big block (free-form style) or in individual pieces (form-based style)?
-# The free-form block will be used
 # Parameters - The Levers and Flippers to be chosen
 # Use this for reference for now then work backwards
-all_options = {
-    'model_version': ['gpt-4o-2024-08-06', 'gpt-4o-2024-08-06'],
-    'input_modality': ['free-text-input', 'form-text-input'],
-    'json_nested': ['single-level-nested', 'no-nesting (flat schema)'],
-    'creation_style': ['single-JSON-creation (model creates entire JSON)', 'stepwise-JSON-creation (individual pieces created then manually combined)'],
-    'pre_processing': ['yes', 'no'],
-    'pre_processing_multiple': ['yes', 'no'],
-    'pre_processing_specification': ['not_specified', 'summarization', 'otter.ai-summary', 'specific-field-extraction'],
-    'prompting_style': ['no_specific_style', 'example_driven', 'role_specific', 'step_by_step', 'error_detection'],
-    'input_text' = ['whole_input_text': "value", 'input_text_pieces': ["piece_1": "value", "piece_2": "value"]]
-    'pre_processing_prompt' = ['pre_processing_prompts': ["prompt_1": "value", "prompt_2": "value"]]
-    'prompt' = ['prompt_text': "value"]
-}
 def generate_json(input_data, parameters):

 os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")
 client = OpenAI()
+# What the survey should look like:
+# Do you want to enter your text in one big block (free-form style) or in individual pieces (form-based style)? ###
+# Free-form style means that a single JSON will be created from the block of text. This JSON will have a single level of nested that was created by the model.
+# Form-based style means that individual JSON pieces will be created from different pieces of text. You will have a few more prompts to enter in this case. These JSON pieces will be manually combined with code. This JSON will also have a single level of nested, that was manually created.
+# Parse either one big block of text conditionally or individual pieces conditionally
+# What model version do you want to process your input data?  ###
+# Parse ['gpt-4o-mini-2024-07-18', 'gpt-4o-2024-08-06']
+# Do you want to pre-process your data? We will feed your data to a conversational model prior to creating the schema, with a prompt of your choosing.
+# Parse ['yes', 'no']
+# Do you want to enter multiple pre-prompts? Or will you only be entering one? You can enter up to three separate pre-prompts at this time.
+# Parse ['yes', 'no']
+# Do you have a specific pre-processing style in mind? This is just for data collection purposes. (Allow to pick multiple)
+# Not specified means you just want to put text in and it doesn't fit the other categories
+# Summarization means you're going to ask the model to produce some sort of summary as your pre-processing step.
+# Specific field extraction means you're going to ask the models to extract some details as your pre-processing step.
+# Parse possibly multiple values ['not_specified', 'summarization', 'specific-field-extraction']
+# Parse [1-3 pre_processing_prompts text boxes]
+# Now it is time to enter your prompts. The number of prompts will be directly related to which input data form you've chosen.
+# This is the input data that will be parsed with this prompt:
+# Input data here ###
+# This is a sample prompt that you can choose or create your own:
+    # These options if free-form with same text for each
+    # Schema 1 prompts:
+    #farm_prompt = "Extract the farm information."
+    # Schema 2 prompts:
+    #interactions_prompt = "Extract the interactions information."
+    # Schema 3 prompts:
+    #trial_prompt = "Extract the trial information."
+    # Schema 1 prompts:
+    #field_prompt = "Extract the field information."
+    #plant_prompt = "Extract the planting information."
+    #log_prompt = "Extract the log information."
+    #soil_prompt = "Extract the soil information."
+    #yield_prompt = "Extract the yield information."
+    # Schema 2 prompts:
+    #interaction_prompt = "Extract the interaction information"
+    #person_prompt = "Please provide a list of people involved in this interaction, with each person's name, role, and any other relevant details."
+    # Schema 3 prompts:
+    #trial_prompt = "Extract the trial information"
+    #treatment_prompt = "Please provide a list of different treatments (strips or blocks with the same conditions applied) performed by the partner."
 # Parameters - The Levers and Flippers to be chosen
 # Use this for reference for now then work backwards
+# Otter.ai-summary means that you've already pre-processed your input data using otter.ai and you don't ##### in this event it's just confusing don't include
+#all_options = {
+#    'model_version': ['gpt-4o-mini-2024-07-18 (Smaller version for faster responses)', 'gpt-4o-2024-08-06 (Latest GPT model with structured outputs)'],
+#    'input_modality': ['free-text-input / single-JSON-creation (model creates entire JSON) / single-level-nested', 'form-text-input / stepwise-JSON-creation (individual pieces created then manually combined) / no-nesting (flat schema)'],
+#    'pre_processing': ['yes', 'no'],
+#    'pre_processing_multiple': ['yes', 'no'],
+#    'pre_processing_specification': ['not_specified', 'summarization', 'specific-field-extraction'],
+#    'prompting_style': ['no_specific_style', 'example_driven', 'role_specific', 'step_by_step', 'error_detection'],
+#    'input_text' = ['whole_input_text': "value", 'input_text_pieces': ["piece_1": "value", "piece_2": "value"]],
+#    'pre_processing_prompt' = ['pre_processing_prompts': ["prompt_1": "value", "prompt_2": "value"]],
+#    'prompt' = ['prompts': ["prompt1": "value", "prompt2", "value"]]
+#}
 def generate_json(input_data, parameters):