rosemariafontana commited on
Commit
9448805
·
verified ·
1 Parent(s): 14b84fe

Update process_data.py

Browse files
Files changed (1) hide show
  1. process_data.py +65 -16
process_data.py CHANGED
@@ -16,24 +16,73 @@ from schema_classes import FarmActivities, Interactions, Trial, FarmActivitiesLi
16
  os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")
17
  client = OpenAI()
18
 
19
- # What the data input survey should look like:
20
- # Do you want to enter your text in one big block (free-form style) or in individual pieces (form-based style)?
21
- # The free-form block will be used
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
  # Parameters - The Levers and Flippers to be chosen
23
  # Use this for reference for now then work backwards
24
- all_options = {
25
- 'model_version': ['gpt-4o-2024-08-06', 'gpt-4o-2024-08-06'],
26
- 'input_modality': ['free-text-input', 'form-text-input'],
27
- 'json_nested': ['single-level-nested', 'no-nesting (flat schema)'],
28
- 'creation_style': ['single-JSON-creation (model creates entire JSON)', 'stepwise-JSON-creation (individual pieces created then manually combined)'],
29
- 'pre_processing': ['yes', 'no'],
30
- 'pre_processing_multiple': ['yes', 'no'],
31
- 'pre_processing_specification': ['not_specified', 'summarization', 'otter.ai-summary', 'specific-field-extraction'],
32
- 'prompting_style': ['no_specific_style', 'example_driven', 'role_specific', 'step_by_step', 'error_detection'],
33
- 'input_text' = ['whole_input_text': "value", 'input_text_pieces': ["piece_1": "value", "piece_2": "value"]]
34
- 'pre_processing_prompt' = ['pre_processing_prompts': ["prompt_1": "value", "prompt_2": "value"]]
35
- 'prompt' = ['prompt_text': "value"]
36
- }
37
 
38
 
39
  def generate_json(input_data, parameters):
 
16
  os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")
17
  client = OpenAI()
18
 
19
+ # What the survey should look like:
20
+
21
+ # Do you want to enter your text in one big block (free-form style) or in individual pieces (form-based style)? ###
22
+
23
+ # Free-form style means that a single JSON will be created from the block of text. This JSON will have a single level of nested that was created by the model.
24
+ # Form-based style means that individual JSON pieces will be created from different pieces of text. You will have a few more prompts to enter in this case. These JSON pieces will be manually combined with code. This JSON will also have a single level of nested, that was manually created.
25
+ # Parse either one big block of text conditionally or individual pieces conditionally
26
+
27
+ # What model version do you want to process your input data? ###
28
+ # Parse ['gpt-4o-mini-2024-07-18', 'gpt-4o-2024-08-06']
29
+
30
+ # Do you want to pre-process your data? We will feed your data to a conversational model prior to creating the schema, with a prompt of your choosing.
31
+ # Parse ['yes', 'no']
32
+
33
+ # Do you want to enter multiple pre-prompts? Or will you only be entering one? You can enter up to three separate pre-prompts at this time.
34
+ # Parse ['yes', 'no']
35
+
36
+ # Do you have a specific pre-processing style in mind? This is just for data collection purposes. (Allow to pick multiple)
37
+
38
+ # Not specified means you just want to put text in and it doesn't fit the other categories
39
+ # Summarization means you're going to ask the model to produce some sort of summary as your pre-processing step.
40
+ # Specific field extraction means you're going to ask the models to extract some details as your pre-processing step.
41
+ # Parse possibly multiple values ['not_specified', 'summarization', 'specific-field-extraction']
42
+
43
+ # Parse [1-3 pre_processing_prompts text boxes]
44
+
45
+ # Now it is time to enter your prompts. The number of prompts will be directly related to which input data form you've chosen.
46
+ # This is the input data that will be parsed with this prompt:
47
+
48
+ # Input data here ###
49
+ # This is a sample prompt that you can choose or create your own:
50
+ # These options if free-form with same text for each
51
+ # Schema 1 prompts:
52
+ #farm_prompt = "Extract the farm information."
53
+ # Schema 2 prompts:
54
+ #interactions_prompt = "Extract the interactions information."
55
+ # Schema 3 prompts:
56
+ #trial_prompt = "Extract the trial information."
57
+
58
+ # Schema 1 prompts:
59
+ #field_prompt = "Extract the field information."
60
+ #plant_prompt = "Extract the planting information."
61
+ #log_prompt = "Extract the log information."
62
+ #soil_prompt = "Extract the soil information."
63
+ #yield_prompt = "Extract the yield information."
64
+ # Schema 2 prompts:
65
+ #interaction_prompt = "Extract the interaction information"
66
+ #person_prompt = "Please provide a list of people involved in this interaction, with each person's name, role, and any other relevant details."
67
+ # Schema 3 prompts:
68
+ #trial_prompt = "Extract the trial information"
69
+ #treatment_prompt = "Please provide a list of different treatments (strips or blocks with the same conditions applied) performed by the partner."
70
+
71
+
72
  # Parameters - The Levers and Flippers to be chosen
73
  # Use this for reference for now then work backwards
74
+ # Otter.ai-summary means that you've already pre-processed your input data using otter.ai and you don't ##### in this event it's just confusing don't include
75
+ #all_options = {
76
+ # 'model_version': ['gpt-4o-mini-2024-07-18 (Smaller version for faster responses)', 'gpt-4o-2024-08-06 (Latest GPT model with structured outputs)'],
77
+ # 'input_modality': ['free-text-input / single-JSON-creation (model creates entire JSON) / single-level-nested', 'form-text-input / stepwise-JSON-creation (individual pieces created then manually combined) / no-nesting (flat schema)'],
78
+ # 'pre_processing': ['yes', 'no'],
79
+ # 'pre_processing_multiple': ['yes', 'no'],
80
+ # 'pre_processing_specification': ['not_specified', 'summarization', 'specific-field-extraction'],
81
+ # 'prompting_style': ['no_specific_style', 'example_driven', 'role_specific', 'step_by_step', 'error_detection'],
82
+ # 'input_text' = ['whole_input_text': "value", 'input_text_pieces': ["piece_1": "value", "piece_2": "value"]],
83
+ # 'pre_processing_prompt' = ['pre_processing_prompts': ["prompt_1": "value", "prompt_2": "value"]],
84
+ # 'prompt' = ['prompts': ["prompt1": "value", "prompt2", "value"]]
85
+ #}
 
86
 
87
 
88
  def generate_json(input_data, parameters):