Update process_data.py
Browse files- process_data.py +65 -16
process_data.py
CHANGED
|
@@ -16,24 +16,73 @@ from schema_classes import FarmActivities, Interactions, Trial, FarmActivitiesLi
|
|
| 16 |
os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")
|
| 17 |
client = OpenAI()
|
| 18 |
|
| 19 |
-
# What the
|
| 20 |
-
|
| 21 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 22 |
# Parameters - The Levers and Flippers to be chosen
|
| 23 |
# Use this for reference for now then work backwards
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
'
|
| 27 |
-
'
|
| 28 |
-
'
|
| 29 |
-
'
|
| 30 |
-
'
|
| 31 |
-
'
|
| 32 |
-
'
|
| 33 |
-
'
|
| 34 |
-
'
|
| 35 |
-
|
| 36 |
-
}
|
| 37 |
|
| 38 |
|
| 39 |
def generate_json(input_data, parameters):
|
|
|
|
| 16 |
os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")
|
| 17 |
client = OpenAI()
|
| 18 |
|
| 19 |
+
# What the survey should look like:
|
| 20 |
+
|
| 21 |
+
# Do you want to enter your text in one big block (free-form style) or in individual pieces (form-based style)? ###
|
| 22 |
+
|
| 23 |
+
# Free-form style means that a single JSON will be created from the block of text. This JSON will have a single level of nested that was created by the model.
|
| 24 |
+
# Form-based style means that individual JSON pieces will be created from different pieces of text. You will have a few more prompts to enter in this case. These JSON pieces will be manually combined with code. This JSON will also have a single level of nested, that was manually created.
|
| 25 |
+
# Parse either one big block of text conditionally or individual pieces conditionally
|
| 26 |
+
|
| 27 |
+
# What model version do you want to process your input data? ###
|
| 28 |
+
# Parse ['gpt-4o-mini-2024-07-18', 'gpt-4o-2024-08-06']
|
| 29 |
+
|
| 30 |
+
# Do you want to pre-process your data? We will feed your data to a conversational model prior to creating the schema, with a prompt of your choosing.
|
| 31 |
+
# Parse ['yes', 'no']
|
| 32 |
+
|
| 33 |
+
# Do you want to enter multiple pre-prompts? Or will you only be entering one? You can enter up to three separate pre-prompts at this time.
|
| 34 |
+
# Parse ['yes', 'no']
|
| 35 |
+
|
| 36 |
+
# Do you have a specific pre-processing style in mind? This is just for data collection purposes. (Allow to pick multiple)
|
| 37 |
+
|
| 38 |
+
# Not specified means you just want to put text in and it doesn't fit the other categories
|
| 39 |
+
# Summarization means you're going to ask the model to produce some sort of summary as your pre-processing step.
|
| 40 |
+
# Specific field extraction means you're going to ask the models to extract some details as your pre-processing step.
|
| 41 |
+
# Parse possibly multiple values ['not_specified', 'summarization', 'specific-field-extraction']
|
| 42 |
+
|
| 43 |
+
# Parse [1-3 pre_processing_prompts text boxes]
|
| 44 |
+
|
| 45 |
+
# Now it is time to enter your prompts. The number of prompts will be directly related to which input data form you've chosen.
|
| 46 |
+
# This is the input data that will be parsed with this prompt:
|
| 47 |
+
|
| 48 |
+
# Input data here ###
|
| 49 |
+
# This is a sample prompt that you can choose or create your own:
|
| 50 |
+
# These options if free-form with same text for each
|
| 51 |
+
# Schema 1 prompts:
|
| 52 |
+
#farm_prompt = "Extract the farm information."
|
| 53 |
+
# Schema 2 prompts:
|
| 54 |
+
#interactions_prompt = "Extract the interactions information."
|
| 55 |
+
# Schema 3 prompts:
|
| 56 |
+
#trial_prompt = "Extract the trial information."
|
| 57 |
+
|
| 58 |
+
# Schema 1 prompts:
|
| 59 |
+
#field_prompt = "Extract the field information."
|
| 60 |
+
#plant_prompt = "Extract the planting information."
|
| 61 |
+
#log_prompt = "Extract the log information."
|
| 62 |
+
#soil_prompt = "Extract the soil information."
|
| 63 |
+
#yield_prompt = "Extract the yield information."
|
| 64 |
+
# Schema 2 prompts:
|
| 65 |
+
#interaction_prompt = "Extract the interaction information"
|
| 66 |
+
#person_prompt = "Please provide a list of people involved in this interaction, with each person's name, role, and any other relevant details."
|
| 67 |
+
# Schema 3 prompts:
|
| 68 |
+
#trial_prompt = "Extract the trial information"
|
| 69 |
+
#treatment_prompt = "Please provide a list of different treatments (strips or blocks with the same conditions applied) performed by the partner."
|
| 70 |
+
|
| 71 |
+
|
| 72 |
# Parameters - The Levers and Flippers to be chosen
|
| 73 |
# Use this for reference for now then work backwards
|
| 74 |
+
# Otter.ai-summary means that you've already pre-processed your input data using otter.ai and you don't ##### in this event it's just confusing don't include
|
| 75 |
+
#all_options = {
|
| 76 |
+
# 'model_version': ['gpt-4o-mini-2024-07-18 (Smaller version for faster responses)', 'gpt-4o-2024-08-06 (Latest GPT model with structured outputs)'],
|
| 77 |
+
# 'input_modality': ['free-text-input / single-JSON-creation (model creates entire JSON) / single-level-nested', 'form-text-input / stepwise-JSON-creation (individual pieces created then manually combined) / no-nesting (flat schema)'],
|
| 78 |
+
# 'pre_processing': ['yes', 'no'],
|
| 79 |
+
# 'pre_processing_multiple': ['yes', 'no'],
|
| 80 |
+
# 'pre_processing_specification': ['not_specified', 'summarization', 'specific-field-extraction'],
|
| 81 |
+
# 'prompting_style': ['no_specific_style', 'example_driven', 'role_specific', 'step_by_step', 'error_detection'],
|
| 82 |
+
# 'input_text' = ['whole_input_text': "value", 'input_text_pieces': ["piece_1": "value", "piece_2": "value"]],
|
| 83 |
+
# 'pre_processing_prompt' = ['pre_processing_prompts': ["prompt_1": "value", "prompt_2": "value"]],
|
| 84 |
+
# 'prompt' = ['prompts': ["prompt1": "value", "prompt2", "value"]]
|
| 85 |
+
#}
|
|
|
|
| 86 |
|
| 87 |
|
| 88 |
def generate_json(input_data, parameters):
|