Update process_data.py
Browse files- process_data.py +103 -97
process_data.py
CHANGED
|
@@ -132,26 +132,19 @@ def generate_json_pieces(input_data, parameters):
|
|
| 132 |
"""
|
| 133 |
specification = input_data["input_text"]
|
| 134 |
model_version = parameters["model_version"]
|
| 135 |
-
|
| 136 |
-
|
| 137 |
-
|
| 138 |
-
|
| 139 |
-
|
| 140 |
-
|
| 141 |
-
|
| 142 |
-
|
| 143 |
-
|
| 144 |
-
|
| 145 |
-
|
| 146 |
-
|
| 147 |
-
|
| 148 |
-
|
| 149 |
-
elif additional_json_creation_options == "Parse from one big input text":
|
| 150 |
-
field_data_specification = specification
|
| 151 |
-
planting_data_specification = specification
|
| 152 |
-
logs_data_specification = specification
|
| 153 |
-
soil_data_specification = specification
|
| 154 |
-
yield_data_specification = specification
|
| 155 |
|
| 156 |
try:
|
| 157 |
# Call OpenAI API to generate structured output based on prompt
|
|
@@ -214,21 +207,6 @@ def generate_json_pieces(input_data, parameters):
|
|
| 214 |
except Exception as e:
|
| 215 |
return {"error": "Failed to generate valid JSON. " + str(e)}
|
| 216 |
|
| 217 |
-
#def process_specifications(data, model_version, json_creation, additional_json_creation_options, field_data_input, planting_data_input, logs_data_input, soil_data_input, yield_data_input):
|
| 218 |
-
# # This method just drives the process
|
| 219 |
-
|
| 220 |
-
# Uncomment when working on flippers
|
| 221 |
-
#if json_creation == "Single JSON Creation":
|
| 222 |
-
# resulting_schema = generate_json(data, model_version)
|
| 223 |
-
#elif json_creation == "Step-wise JSON Creation":
|
| 224 |
-
# resulting_schema = generate_json_pieces(data, model_version, additional_json_creation_options, field_data_input, planting_data_input, logs_data_input, soil_data_input, yield_data_input)
|
| 225 |
-
#return resulting_schema
|
| 226 |
-
# global original_outputs, xml_outputs
|
| 227 |
-
|
| 228 |
-
# output1, output2, output3 = generate_json(data, model_version)
|
| 229 |
-
|
| 230 |
-
|
| 231 |
-
# return output1, output2, output3
|
| 232 |
|
| 233 |
def pre_processing(input_data, parameters):
|
| 234 |
"""
|
|
@@ -252,51 +230,73 @@ def pre_processing(input_data, parameters):
|
|
| 252 |
input_data["input_context"] = (bool) whether the input text should be used as context or not
|
| 253 |
input_data["input_text"] = (str) input text
|
| 254 |
"""
|
| 255 |
-
|
| 256 |
-
|
| 257 |
-
|
| 258 |
-
|
| 259 |
-
|
| 260 |
-
|
| 261 |
-
|
| 262 |
-
|
| 263 |
-
for pre_prompt in pre_processing_list:
|
| 264 |
-
try:
|
| 265 |
-
print("Pre-Processing: ")
|
| 266 |
-
if pre_prompt:
|
| 267 |
-
print("Prompt: ")
|
| 268 |
-
print(pre_prompt)
|
| 269 |
-
print("Input Text: ")
|
| 270 |
-
print(input_text)
|
| 271 |
-
print("Model: ")
|
| 272 |
-
print(parameters["model_version"])
|
| 273 |
-
|
| 274 |
-
response = client.chat.completions.create(
|
| 275 |
-
model=parameters["model_version"],
|
| 276 |
-
messages=[
|
| 277 |
-
{"role": "system", "content": pre_prompt},
|
| 278 |
-
{"role": "user", "content": input_text}
|
| 279 |
-
]
|
| 280 |
-
)
|
| 281 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 282 |
|
| 283 |
-
|
| 284 |
-
|
| 285 |
-
|
| 286 |
-
|
| 287 |
-
|
| 288 |
-
|
| 289 |
-
|
| 290 |
-
|
| 291 |
-
|
| 292 |
-
|
| 293 |
-
input_data["input_context"] = False
|
| 294 |
-
input_data["input_text"] = input_text
|
| 295 |
-
return input_data
|
| 296 |
-
else:
|
| 297 |
-
input_context = f"You are processing farm activity, interactions, and trial data. Here's important context of the data {parameters['combined_prompt']}. With this context in mind, "
|
| 298 |
-
input_data["input_context"] = input_context
|
| 299 |
-
return input_data
|
| 300 |
|
| 301 |
|
| 302 |
|
|
@@ -322,13 +322,19 @@ def process_specifications(input_data, parameters):
|
|
| 322 |
3 processed data-filled JSON objects: farm_pretty_json, interactions_pretty_json, trial_pretty_json
|
| 323 |
"""
|
| 324 |
# here is where parsing and other things will happen before
|
| 325 |
-
if parameters["
|
| 326 |
-
|
| 327 |
-
|
| 328 |
-
|
|
|
|
|
|
|
|
|
|
| 329 |
input_data["input_context"] = False
|
| 330 |
-
|
| 331 |
-
|
|
|
|
|
|
|
|
|
|
| 332 |
|
| 333 |
|
| 334 |
def parse_survey_stack_parameters(data):
|
|
@@ -414,26 +420,26 @@ def parse_survey_stack_data(data):
|
|
| 414 |
"""
|
| 415 |
processed_data = {}
|
| 416 |
|
|
|
|
| 417 |
|
| 418 |
processed_data["stepwise_json_creation"] = data[0]['data']['stepwisejsoncreation']['value']
|
| 419 |
|
| 420 |
if processed_data["stepwise_json_creation"] == "stepwisejsoncreation":
|
| 421 |
-
processed_data["
|
| 422 |
-
processed_data["
|
| 423 |
-
processed_data["
|
| 424 |
-
processed_data["
|
| 425 |
-
processed_data["
|
| 426 |
-
processed_data["
|
| 427 |
processed_data["input_text"] = "EMPTY"
|
| 428 |
elif processed_data["stepwise_json_creation"] == "singlejsoncreation":
|
| 429 |
processed_data["input_text"] = data[0]['data']['onelonginputtext']['value']
|
| 430 |
-
processed_data["
|
| 431 |
-
processed_data["
|
| 432 |
-
processed_data["
|
| 433 |
-
processed_data["
|
| 434 |
-
processed_data["
|
| 435 |
-
processed_data["
|
| 436 |
|
| 437 |
|
| 438 |
return processed_data
|
| 439 |
-
|
|
|
|
| 132 |
"""
|
| 133 |
specification = input_data["input_text"]
|
| 134 |
model_version = parameters["model_version"]
|
| 135 |
+
|
| 136 |
+
if parameters["pre_prompt"] == True:
|
| 137 |
+
field_data_input = input_data["input_text_pieces"]["field_data_input"]
|
| 138 |
+
planting_data_input = input_data["input_text_pieces"]["planting_data_input"]
|
| 139 |
+
logs_data_input = input_data["input_text_pieces"]["logs_data_input"]
|
| 140 |
+
soil_data_input = input_data["input_text_pieces"]["soil_data_input"]
|
| 141 |
+
yield_data_input = input_data["input_text_pieces"]["yield_data_input"]
|
| 142 |
+
else:
|
| 143 |
+
field_data_input = input_data["input_text_pieces"]["pre_processed_pieces"]["field_data_input"]
|
| 144 |
+
planting_data_input = input_data["input_text_pieces"]["pre_processed_pieces"]["planting_data_input"]
|
| 145 |
+
logs_data_input = input_data["input_text_pieces"]["pre_processed_pieces"]["logs_data_input"]
|
| 146 |
+
soil_data_input = input_data["input_text_pieces"]["pre_processed_pieces"]["soil_data_input"]
|
| 147 |
+
yield_data_input = input_data["input_text_pieces"]["pre_processed_pieces"]["yield_data_input"]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 148 |
|
| 149 |
try:
|
| 150 |
# Call OpenAI API to generate structured output based on prompt
|
|
|
|
| 207 |
except Exception as e:
|
| 208 |
return {"error": "Failed to generate valid JSON. " + str(e)}
|
| 209 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 210 |
|
| 211 |
def pre_processing(input_data, parameters):
|
| 212 |
"""
|
|
|
|
| 230 |
input_data["input_context"] = (bool) whether the input text should be used as context or not
|
| 231 |
input_data["input_text"] = (str) input text
|
| 232 |
"""
|
| 233 |
+
if parameters["stepwise_json_creation"] == "stepwisejsoncreation":
|
| 234 |
+
pre_processing_prompts = [parameters["context_pre_prompt"], parameters["summary_pre_prompt"], parameters["conversation_pre_prompt"], parameters["example_pre_prompt"]]
|
| 235 |
+
one_giant_preprompt = ""
|
| 236 |
+
input_data["input_text_pieces"]["pre_processed_pieces"] = {}
|
| 237 |
+
|
| 238 |
+
for pre_prompt in pre_processing_prompts:
|
| 239 |
+
if pre_prompt:
|
| 240 |
+
one_giant_preprompt += pre_prompt
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 241 |
|
| 242 |
+
for text_label, text_body in input_data["input_text_pieces"].items():
|
| 243 |
+
|
| 244 |
+
response = client.chat.completions.create(
|
| 245 |
+
model=parameters["model_version"],
|
| 246 |
+
messages=[
|
| 247 |
+
{"role": "system", "content": one_giant_preprompt},
|
| 248 |
+
{"role": "user", "content": text_body}
|
| 249 |
+
]
|
| 250 |
+
)
|
| 251 |
+
|
| 252 |
+
response_text = response.choices[0].message.content
|
| 253 |
+
input_data["input_text_pieces"]["pre_processed_pieces"][text_label] = response_text
|
| 254 |
+
|
| 255 |
+
|
| 256 |
+
if parameters["stepwise_json_creation"] == "singlejsoncreation":
|
| 257 |
+
if parameters["chaining"]:
|
| 258 |
+
|
| 259 |
+
input_text = input_data["input_text"]
|
| 260 |
+
pre_processing_list = [parameters["context_pre_prompt"], parameters["summary_pre_prompt"], parameters["conversation_pre_prompt"], parameters["example_pre_prompt"]]
|
| 261 |
+
|
| 262 |
+
print("PreProcessingList")
|
| 263 |
+
print(pre_processing_list)
|
| 264 |
+
for pre_prompt in pre_processing_list:
|
| 265 |
+
try:
|
| 266 |
+
print("Pre-Processing: ")
|
| 267 |
+
if pre_prompt:
|
| 268 |
+
print("Prompt: ")
|
| 269 |
+
print(pre_prompt)
|
| 270 |
+
print("Input Text: ")
|
| 271 |
+
print(input_text)
|
| 272 |
+
print("Model: ")
|
| 273 |
+
print(parameters["model_version"])
|
| 274 |
+
|
| 275 |
+
response = client.chat.completions.create(
|
| 276 |
+
model=parameters["model_version"],
|
| 277 |
+
messages=[
|
| 278 |
+
{"role": "system", "content": pre_prompt},
|
| 279 |
+
{"role": "user", "content": input_text}
|
| 280 |
+
]
|
| 281 |
+
)
|
| 282 |
+
|
| 283 |
+
response_text = response.choices[0].message.content
|
| 284 |
+
|
| 285 |
+
print("Response Text: ")
|
| 286 |
+
print(response_text)
|
| 287 |
+
|
| 288 |
+
input_text = response_text
|
| 289 |
|
| 290 |
+
except Exception as e:
|
| 291 |
+
print(f"Failed to parse response as JSON. Error was: {e}")
|
| 292 |
+
|
| 293 |
+
input_data["input_context"] = False
|
| 294 |
+
input_data["input_text"] = input_text
|
| 295 |
+
return input_data
|
| 296 |
+
else:
|
| 297 |
+
input_context = f"You are processing farm activity, interactions, and trial data. Here's important context of the data {parameters['combined_prompt']}. With this context in mind, "
|
| 298 |
+
input_data["input_context"] = input_context
|
| 299 |
+
return input_data
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 300 |
|
| 301 |
|
| 302 |
|
|
|
|
| 322 |
3 processed data-filled JSON objects: farm_pretty_json, interactions_pretty_json, trial_pretty_json
|
| 323 |
"""
|
| 324 |
# here is where parsing and other things will happen before
|
| 325 |
+
if parameters["stepwise_json_creation"] == "stepwisejsoncreation":
|
| 326 |
+
if parameters["pre_prompt"] == True:
|
| 327 |
+
processed_input = pre_processing(input_data, parameters)
|
| 328 |
+
else:
|
| 329 |
+
processed_input = input_data
|
| 330 |
+
return generate_json_pieces(processed_input, parameters)
|
| 331 |
+
elif parameters["stepwise_json_creation"] == "singlejsoncreation":
|
| 332 |
input_data["input_context"] = False
|
| 333 |
+
if parameters["pre_prompt"] == True:
|
| 334 |
+
processed_input = pre_processing(input_data, parameters)
|
| 335 |
+
else:
|
| 336 |
+
processed_input = input_data
|
| 337 |
+
return generate_json(processed_input, parameters)
|
| 338 |
|
| 339 |
|
| 340 |
def parse_survey_stack_parameters(data):
|
|
|
|
| 420 |
"""
|
| 421 |
processed_data = {}
|
| 422 |
|
| 423 |
+
farm_management_inputs = data[0]['data']['group_4']
|
| 424 |
|
| 425 |
processed_data["stepwise_json_creation"] = data[0]['data']['stepwisejsoncreation']['value']
|
| 426 |
|
| 427 |
if processed_data["stepwise_json_creation"] == "stepwisejsoncreation":
|
| 428 |
+
processed_data["input_text_pieces"] = {}
|
| 429 |
+
processed_data["input_text_pieces"]["field_data_input"] = farm_management_inputs.get('field_data_input', {}).get('value', None)
|
| 430 |
+
processed_data["input_text_pieces"]["planting_data_input"] = farm_management_inputs.get('planting_data_input', {}).get('value', None)
|
| 431 |
+
processed_data["input_text_pieces"]["log_data_input"] = farm_management_inputs.get('log_data_input', {}).get('value', None)
|
| 432 |
+
processed_data["input_text_pieces"]["soil_data_input"] = farm_management_inputs.get('soil_data_input', {}).get('value', None)
|
| 433 |
+
processed_data["input_text_pieces"]["yield_data_input"] = farm_management_inputs.get('yield_data_input', {}).get('value', None)
|
| 434 |
processed_data["input_text"] = "EMPTY"
|
| 435 |
elif processed_data["stepwise_json_creation"] == "singlejsoncreation":
|
| 436 |
processed_data["input_text"] = data[0]['data']['onelonginputtext']['value']
|
| 437 |
+
processed_data["input_text_pieces"] = {}
|
| 438 |
+
processed_data["input_text_pieces"]["field_data_input"] = "EMPTY"
|
| 439 |
+
processed_data["input_text_pieces"]["planting_data_input"] = "EMPTY"
|
| 440 |
+
processed_data["input_text_pieces"]["log_data_input"] = "EMPTY"
|
| 441 |
+
processed_data["input_text_pieces"]["soil_data_input"] = "EMPTY"
|
| 442 |
+
processed_data["input_text_pieces"]["yield_data_input"] = "EMPTY"
|
| 443 |
|
| 444 |
|
| 445 |
return processed_data
|
|
|