data-translation-experiments / process_data.py
rosemariafontana's picture
Update process_data.py
bdf951a verified
raw
history blame
12.7 kB
import os
from pydantic import BaseModel, Field, validator, ValidationError
import gradio as gr
from openai import OpenAI
from typing import List, Dict, Any, Optional, Literal, Union
from enum import Enum
from gradio_toggle import Toggle
import json
from schema_classes import FarmActivities, Interactions, Trial, FarmActivitiesLite, PlantingLite, Log, Soil, Yield
# adding comment
# Chatbot model
os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")
client = OpenAI()
def generate_json(input_data, parameters):
"""
Function to prompt OpenAI API to generate structured JSON output.
"""
input_text = input_data["input_text"]
model_version = parameters["model_version"]
farm_prompt = "Extract the farm information."
interactions_prompt = "Extract the interactions information."
trial_prompt = "Extract the trial information."
if input_data["input_context"]:
farm_prompt = input_data["input_context"] + farm_prompt
interactions_prompt = input_data["input_context"] + interactions_prompt
trial_prompt = input_data["input_context"] + trial_prompt
try:
#Call OpenAI API to generate structured output based on prompt
farm_info_response = client.beta.chat.completions.parse(
model=model_version, # Use GPT model that supports structured output
messages=[
{"role": "system", "content": farm_prompt},
{"role": "user", "content": input_text}
],
response_format=FarmActivities,
)
farm_generated_json = farm_info_response.choices[0].message.parsed
print("FARM JSON: ")
print(farm_generated_json) # debugging
farm_pretty_json = farm_generated_json.json()
interactions_response = client.beta.chat.completions.parse(
model=model_version, # Use GPT model that supports structured output
messages=[
{"role": "system", "content": interactions_prompt},
{"role": "user", "content": input_text}
],
response_format=Interactions,
)
interactions_generated_json = interactions_response.choices[0].message.parsed
print("INTERACTIONS JSON: ")
print(interactions_generated_json) # debugging 2
interactions_pretty_json = interactions_generated_json.json()
trial_response = client.beta.chat.completions.parse(
model=model_version, # Use GPT model that supports structured output
messages=[
{"role": "system", "content": trial_prompt},
{"role": "user", "content": input_text}
],
response_format=Trial,
)
trial_generated_json = trial_response.choices[0].message.parsed
print("TRIALS JSON: ")
print(trial_generated_json) # debugging 3
trial_pretty_json = trial_generated_json.json()
return farm_pretty_json, interactions_pretty_json, trial_pretty_json
except ValidationError as e:
return {"error": str(e)}
except Exception as e:
return {"error": "Failed to generate valid JSON. " + str(e)}
# This is for the step-wise JSON creation
def generate_json_pieces(specification, model_version, additional_json_creation_options, field_data_input, planting_data_input, logs_data_input, soil_data_input, yield_data_input):
if additional_json_creation_options == "Explicit specific pieces":
field_data_specification = field_data_input
planting_data_specification = planting_data_input
logs_data_specification = logs_data_input
soil_data_specification = soil_data_input
yield_data_specification = yield_data_input
elif additional_json_creation_options == "Parse from one big input text":
field_data_specification = specification
planting_data_specification = specification
logs_data_specification = specification
soil_data_specification = specification
yield_data_specification = specification
try:
# Call OpenAI API to generate structured output based on prompt
field_response = client.beta.chat.completions.parse(
model=model_version, # Use GPT model that supports structured output
messages=[
{"role": "system", "content": "Extract the field information."},
{"role": "user", "content": field_data_specification}
],
response_format=FarmActivitiesLite,
)
plant_response = client.beta.chat.completions.parse(
model=model_version, # Use GPT model that supports structured output
messages=[
{"role": "system", "content": "Extract the planting information."},
{"role": "user", "content": planting_data_specification}
],
response_format=PlantingLite,
)
log_response = client.beta.chat.completions.parse(
model=model_version, # Use GPT model that supports structured output
messages=[
{"role": "system", "content": "Extract the log information."},
{"role": "user", "content": logs_data_specification}
],
response_format=Log,
)
soil_response = client.beta.chat.completions.parse(
model=model_version, # Use GPT model that supports structured output
messages=[
{"role": "system", "content": "Extract the soil information."},
{"role": "user", "content": soil_data_specification}
],
response_format=Soil,
)
yield_response = client.beta.chat.completions.parse(
model=model_version, # Use GPT model that supports structured output
messages=[
{"role": "system", "content": "Extract the yield information."},
{"role": "user", "content": yield_data_specification}
],
response_format=Yield,
)
combined_json = field_response.choices[0].message.parsed.copy()
combined_json["plantings"] = plant_response.choices[0].message.parsed
combined_json["plantings"]["logs"] = log_response.choices[0].message.parsed
combined_json["plantings"]["soil"] = soil_response.choices[0].message.parsed
combined_json["plantings"]["yield"] = yield_response.choices[0].message.parsed
print(combined_json) # debugging
pretty_json = combined_json.json()
return pretty_json
except Exception as e:
return {"error": "Failed to generate valid JSON. " + str(e)}
#def process_specifications(data, model_version, json_creation, additional_json_creation_options, field_data_input, planting_data_input, logs_data_input, soil_data_input, yield_data_input):
# # This method just drives the process
# Uncomment when working on flippers
#if json_creation == "Single JSON Creation":
# resulting_schema = generate_json(data, model_version)
#elif json_creation == "Step-wise JSON Creation":
# resulting_schema = generate_json_pieces(data, model_version, additional_json_creation_options, field_data_input, planting_data_input, logs_data_input, soil_data_input, yield_data_input)
#return resulting_schema
# global original_outputs, xml_outputs
# output1, output2, output3 = generate_json(data, model_version)
# return output1, output2, output3
def pre_processing(input_data, parameters):
# in the event there's a pre-prompt, process
if parameters["chaining"]:
input_text = input_data["input_text"]
pre_processing_list = [parameters["context_pre_prompt"], parameters["summary_pre_prompt"], parameters["conversation_pre_prompt"], parameters["example_pre_prompt"]]
print("PreProcessingList")
print(pre_processing_list)
for pre_prompt in pre_processing_list:
try:
print("Pre-Processing: ")
if pre_prompt:
print("Prompt: ")
print(pre_prompt)
print("Input Text: ")
print(input_text)
print("Model: ")
print(parameters["model_version"])
response = client.chat.completions.create(
model=parameters["model_version"],
messages=[
{"role": "system", "content": pre_prompt},
{"role": "user", "content": input_text}
]
)
response_text = response.choices[0].message.content
print("Response Text: ")
print(response_text)
input_text = response_text
except Exception as e:
print(f"Failed to parse response as JSON. Error was: {e}")
input_data["input_context"] = False
input_data["input_text"] = input_text
return input_data
else:
input_context = f"You are processing farm activity, interactions, and trial data. Here's important context of the data {parameters['combined_prompt']}. With this context in mind, "
input_data["input_context"] = input_context
return input_data
def process_specifications(input_data, parameters):
# here is where parsing and other things will happen before
if parameters["pre_prompt"] == True:
processed_input = pre_processing(input_data, parameters)
return generate_json(processed_input, parameters)
else:
input_data["input_context"] = False
input_data["input_text"] = input_data["input_text"]
return generate_json(input_data, parameters)
def parse_survey_stack_parameters(data):
processed_data = {}
processed_data["model_version"] = data[0]['data']['modelversion']['value'][0]
print("DATA: ")
print(data)
try:
pre_promp_parameters = data[0]['data']['group_2']
if pre_promp_parameters['preprompt']['value'][0] == 'continue_preprompts':
processed_data["pre_prompt"] = True
# Accessing context and other prompts, with defaults in case they are None
processed_data["context_pre_prompt"] = pre_promp_parameters.get('contextpreprompt', {}).get('value', None)
processed_data["summary_pre_prompt"] = pre_promp_parameters.get('summarypreprompt', {}).get('value', None)
processed_data["conversation_pre_prompt"] = pre_promp_parameters.get('conversationpreprompt', {}).get('value', None)
processed_data["example_pre_prompt"] = pre_promp_parameters.get('examplepreprompt', {}).get('value', None)
# Check if chaining is set to "yes" or "no"
chaining_value = pre_promp_parameters.get('prepromptchaining', {}).get('value', [None])[0]
if chaining_value == "no":
# Combine prompts if chaining is "no"
combined_prompt = " ".join(
filter(None, [
processed_data["context_pre_prompt"],
processed_data["summary_pre_prompt"],
processed_data["conversation_pre_prompt"],
processed_data["example_pre_prompt"]
])
)
processed_data["chaining"] = False
processed_data["combined_prompt"] = combined_prompt
else:
# Set combined_pre_prompt to None if chaining is enabled
processed_data["chaining"] = True
processed_data["combined_pre_prompt"] = None
else:
# Set fields to None if preprompt is not "continue_preprompts"
processed_data["pre_prompt"] = False
processed_data["context_pre_prompt"] = None
processed_data["summary_pre_prompt"] = None
processed_data["conversation_pre_prompt"] = None
processed_data["example_pre_prompt"] = None
processed_data["chaining"] = False
processed_data["combined_pre_prompt"] = None
except Exception as e:
print(f"An error occurred: {e}")
return processed_data
def parse_survey_stack_data(data):
processed_data = {}
processed_data["input_text"] = data[0]['data']['inputtext']['value']
return processed_data