Update process_data.py
Browse files- process_data.py +52 -23
process_data.py
CHANGED
|
@@ -145,13 +145,30 @@ def generate_json_pieces(input_data, parameters):
|
|
| 145 |
logs_data_input = input_data["input_text_pieces"]["pre_processed_pieces"]["logs_data_input"]
|
| 146 |
soil_data_input = input_data["input_text_pieces"]["pre_processed_pieces"]["soil_data_input"]
|
| 147 |
yield_data_input = input_data["input_text_pieces"]["pre_processed_pieces"]["yield_data_input"]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 148 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 149 |
try:
|
| 150 |
# Call OpenAI API to generate structured output based on prompt
|
| 151 |
field_response = client.beta.chat.completions.parse(
|
| 152 |
model=model_version, # Use GPT model that supports structured output
|
| 153 |
messages=[
|
| 154 |
-
{"role": "system", "content":
|
| 155 |
{"role": "user", "content": field_data_specification}
|
| 156 |
],
|
| 157 |
response_format=FarmActivitiesLite,
|
|
@@ -160,7 +177,7 @@ def generate_json_pieces(input_data, parameters):
|
|
| 160 |
plant_response = client.beta.chat.completions.parse(
|
| 161 |
model=model_version, # Use GPT model that supports structured output
|
| 162 |
messages=[
|
| 163 |
-
{"role": "system", "content":
|
| 164 |
{"role": "user", "content": planting_data_specification}
|
| 165 |
],
|
| 166 |
response_format=PlantingLite,
|
|
@@ -169,7 +186,7 @@ def generate_json_pieces(input_data, parameters):
|
|
| 169 |
log_response = client.beta.chat.completions.parse(
|
| 170 |
model=model_version, # Use GPT model that supports structured output
|
| 171 |
messages=[
|
| 172 |
-
{"role": "system", "content":
|
| 173 |
{"role": "user", "content": logs_data_specification}
|
| 174 |
],
|
| 175 |
response_format=Log,
|
|
@@ -178,7 +195,7 @@ def generate_json_pieces(input_data, parameters):
|
|
| 178 |
soil_response = client.beta.chat.completions.parse(
|
| 179 |
model=model_version, # Use GPT model that supports structured output
|
| 180 |
messages=[
|
| 181 |
-
{"role": "system", "content":
|
| 182 |
{"role": "user", "content": soil_data_specification}
|
| 183 |
],
|
| 184 |
response_format=Soil,
|
|
@@ -187,7 +204,7 @@ def generate_json_pieces(input_data, parameters):
|
|
| 187 |
yield_response = client.beta.chat.completions.parse(
|
| 188 |
model=model_version, # Use GPT model that supports structured output
|
| 189 |
messages=[
|
| 190 |
-
{"role": "system", "content":
|
| 191 |
{"role": "user", "content": yield_data_specification}
|
| 192 |
],
|
| 193 |
response_format=Yield,
|
|
@@ -231,26 +248,32 @@ def pre_processing(input_data, parameters):
|
|
| 231 |
input_data["input_text"] = (str) input text
|
| 232 |
"""
|
| 233 |
if parameters["stepwise_json_creation"] == "stepwisejsoncreation":
|
| 234 |
-
pre_processing_prompts = [parameters["context_pre_prompt"], parameters["summary_pre_prompt"], parameters["conversation_pre_prompt"], parameters["example_pre_prompt"]]
|
| 235 |
-
one_giant_preprompt = ""
|
| 236 |
-
input_data["input_text_pieces"]["pre_processed_pieces"] = {}
|
| 237 |
-
|
| 238 |
-
for pre_prompt in pre_processing_prompts:
|
| 239 |
-
if pre_prompt:
|
| 240 |
-
one_giant_preprompt += pre_prompt
|
| 241 |
|
| 242 |
-
|
|
|
|
|
|
|
|
|
|
| 243 |
|
| 244 |
-
|
| 245 |
-
|
| 246 |
-
|
| 247 |
-
|
| 248 |
-
|
| 249 |
-
|
| 250 |
-
|
| 251 |
-
|
| 252 |
-
|
| 253 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 254 |
|
| 255 |
|
| 256 |
if parameters["stepwise_json_creation"] == "singlejsoncreation":
|
|
@@ -418,13 +441,16 @@ def parse_survey_stack_data(data):
|
|
| 418 |
processed_data
|
| 419 |
processed_data["input_text"] = (str) the raw input text
|
| 420 |
"""
|
|
|
|
| 421 |
processed_data = {}
|
| 422 |
|
| 423 |
farm_management_inputs = data[0]['data']['group_4']
|
| 424 |
|
| 425 |
processed_data["stepwise_json_creation"] = data[0]['data']['stepwisejsoncreation']['value']
|
|
|
|
| 426 |
|
| 427 |
if processed_data["stepwise_json_creation"] == "stepwisejsoncreation":
|
|
|
|
| 428 |
processed_data["input_text_pieces"] = {}
|
| 429 |
processed_data["input_text_pieces"]["field_data_input"] = farm_management_inputs.get('field_data_input', {}).get('value', None)
|
| 430 |
processed_data["input_text_pieces"]["planting_data_input"] = farm_management_inputs.get('planting_data_input', {}).get('value', None)
|
|
@@ -433,6 +459,7 @@ def parse_survey_stack_data(data):
|
|
| 433 |
processed_data["input_text_pieces"]["yield_data_input"] = farm_management_inputs.get('yield_data_input', {}).get('value', None)
|
| 434 |
processed_data["input_text"] = "EMPTY"
|
| 435 |
elif processed_data["stepwise_json_creation"] == "singlejsoncreation":
|
|
|
|
| 436 |
processed_data["input_text"] = data[0]['data']['onelonginputtext']['value']
|
| 437 |
processed_data["input_text_pieces"] = {}
|
| 438 |
processed_data["input_text_pieces"]["field_data_input"] = "EMPTY"
|
|
@@ -441,5 +468,7 @@ def parse_survey_stack_data(data):
|
|
| 441 |
processed_data["input_text_pieces"]["soil_data_input"] = "EMPTY"
|
| 442 |
processed_data["input_text_pieces"]["yield_data_input"] = "EMPTY"
|
| 443 |
|
|
|
|
|
|
|
| 444 |
|
| 445 |
return processed_data
|
|
|
|
| 145 |
logs_data_input = input_data["input_text_pieces"]["pre_processed_pieces"]["logs_data_input"]
|
| 146 |
soil_data_input = input_data["input_text_pieces"]["pre_processed_pieces"]["soil_data_input"]
|
| 147 |
yield_data_input = input_data["input_text_pieces"]["pre_processed_pieces"]["yield_data_input"]
|
| 148 |
+
|
| 149 |
+
field_prompt = "Extract the field information."
|
| 150 |
+
plant_prompt = "Extract the planting information."
|
| 151 |
+
log_prompt = "Extract the log information."
|
| 152 |
+
soil_prompt = "Extract the soil information."
|
| 153 |
+
yield_prompt = "Extract the yield information."
|
| 154 |
+
|
| 155 |
+
# fix this part
|
| 156 |
|
| 157 |
+
# figure out what happens when there's
|
| 158 |
+
# chaining, pre-prompts, context, etc ....
|
| 159 |
+
|
| 160 |
+
#if not parameters["chaining"] and input_data["input_context"]:
|
| 161 |
+
# farm_prompt = input_data["input_text_pieces"]["field_data_input"] + farm_prompt
|
| 162 |
+
# farm_prompt = input_data["input_text_pieces"]["field_data_input"] + farm_prompt
|
| 163 |
+
# farm_prompt = input_data["input_text_pieces"]["field_data_input"] + farm_prompt
|
| 164 |
+
# farm_prompt = input_data["input_text_pieces"]["field_data_input"] + farm_prompt
|
| 165 |
+
|
| 166 |
try:
|
| 167 |
# Call OpenAI API to generate structured output based on prompt
|
| 168 |
field_response = client.beta.chat.completions.parse(
|
| 169 |
model=model_version, # Use GPT model that supports structured output
|
| 170 |
messages=[
|
| 171 |
+
{"role": "system", "content": field_prompt},
|
| 172 |
{"role": "user", "content": field_data_specification}
|
| 173 |
],
|
| 174 |
response_format=FarmActivitiesLite,
|
|
|
|
| 177 |
plant_response = client.beta.chat.completions.parse(
|
| 178 |
model=model_version, # Use GPT model that supports structured output
|
| 179 |
messages=[
|
| 180 |
+
{"role": "system", "content": plant_prompt},
|
| 181 |
{"role": "user", "content": planting_data_specification}
|
| 182 |
],
|
| 183 |
response_format=PlantingLite,
|
|
|
|
| 186 |
log_response = client.beta.chat.completions.parse(
|
| 187 |
model=model_version, # Use GPT model that supports structured output
|
| 188 |
messages=[
|
| 189 |
+
{"role": "system", "content": log_prompt},
|
| 190 |
{"role": "user", "content": logs_data_specification}
|
| 191 |
],
|
| 192 |
response_format=Log,
|
|
|
|
| 195 |
soil_response = client.beta.chat.completions.parse(
|
| 196 |
model=model_version, # Use GPT model that supports structured output
|
| 197 |
messages=[
|
| 198 |
+
{"role": "system", "content": soil_prompt},
|
| 199 |
{"role": "user", "content": soil_data_specification}
|
| 200 |
],
|
| 201 |
response_format=Soil,
|
|
|
|
| 204 |
yield_response = client.beta.chat.completions.parse(
|
| 205 |
model=model_version, # Use GPT model that supports structured output
|
| 206 |
messages=[
|
| 207 |
+
{"role": "system", "content": yield_prompt},
|
| 208 |
{"role": "user", "content": yield_data_specification}
|
| 209 |
],
|
| 210 |
response_format=Yield,
|
|
|
|
| 248 |
input_data["input_text"] = (str) input text
|
| 249 |
"""
|
| 250 |
if parameters["stepwise_json_creation"] == "stepwisejsoncreation":
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 251 |
|
| 252 |
+
if parameters["chaining"]:
|
| 253 |
+
pre_processing_prompts = [parameters["context_pre_prompt"], parameters["summary_pre_prompt"], parameters["conversation_pre_prompt"], parameters["example_pre_prompt"]]
|
| 254 |
+
one_giant_preprompt = ""
|
| 255 |
+
input_data["input_text_pieces"]["pre_processed_pieces"] = {}
|
| 256 |
|
| 257 |
+
for pre_prompt in pre_processing_prompts:
|
| 258 |
+
if pre_prompt:
|
| 259 |
+
one_giant_preprompt += pre_prompt
|
| 260 |
+
|
| 261 |
+
for text_label, text_body in input_data["input_text_pieces"].items():
|
| 262 |
+
|
| 263 |
+
response = client.chat.completions.create(
|
| 264 |
+
model=parameters["model_version"],
|
| 265 |
+
messages=[
|
| 266 |
+
{"role": "system", "content": one_giant_preprompt},
|
| 267 |
+
{"role": "user", "content": text_body}
|
| 268 |
+
]
|
| 269 |
+
)
|
| 270 |
+
|
| 271 |
+
response_text = response.choices[0].message.content
|
| 272 |
+
input_data["input_text_pieces"]["pre_processed_pieces"][text_label] = response_text
|
| 273 |
+
else:
|
| 274 |
+
input_context = f"You are processing farm activity, interactions, and trial data. Here's important context of the data {parameters['combined_prompt']}. With this context in mind, "
|
| 275 |
+
input_data["input_context"] = input_context
|
| 276 |
+
return input_data
|
| 277 |
|
| 278 |
|
| 279 |
if parameters["stepwise_json_creation"] == "singlejsoncreation":
|
|
|
|
| 441 |
processed_data
|
| 442 |
processed_data["input_text"] = (str) the raw input text
|
| 443 |
"""
|
| 444 |
+
print("PROCESSING SURVEY STACK DATA")
|
| 445 |
processed_data = {}
|
| 446 |
|
| 447 |
farm_management_inputs = data[0]['data']['group_4']
|
| 448 |
|
| 449 |
processed_data["stepwise_json_creation"] = data[0]['data']['stepwisejsoncreation']['value']
|
| 450 |
+
print("STEPWISE?: " + str(processed_data["stepwise_json_creation"]))
|
| 451 |
|
| 452 |
if processed_data["stepwise_json_creation"] == "stepwisejsoncreation":
|
| 453 |
+
print("IN THE STEP")
|
| 454 |
processed_data["input_text_pieces"] = {}
|
| 455 |
processed_data["input_text_pieces"]["field_data_input"] = farm_management_inputs.get('field_data_input', {}).get('value', None)
|
| 456 |
processed_data["input_text_pieces"]["planting_data_input"] = farm_management_inputs.get('planting_data_input', {}).get('value', None)
|
|
|
|
| 459 |
processed_data["input_text_pieces"]["yield_data_input"] = farm_management_inputs.get('yield_data_input', {}).get('value', None)
|
| 460 |
processed_data["input_text"] = "EMPTY"
|
| 461 |
elif processed_data["stepwise_json_creation"] == "singlejsoncreation":
|
| 462 |
+
print("IN THE SINGLE")
|
| 463 |
processed_data["input_text"] = data[0]['data']['onelonginputtext']['value']
|
| 464 |
processed_data["input_text_pieces"] = {}
|
| 465 |
processed_data["input_text_pieces"]["field_data_input"] = "EMPTY"
|
|
|
|
| 468 |
processed_data["input_text_pieces"]["soil_data_input"] = "EMPTY"
|
| 469 |
processed_data["input_text_pieces"]["yield_data_input"] = "EMPTY"
|
| 470 |
|
| 471 |
+
print("RETURNING DATA")
|
| 472 |
+
print(processed_data)
|
| 473 |
|
| 474 |
return processed_data
|