Spaces:

our-sci
/

data-translation-experiments

Sleeping

App Files Files Community

data-translation-experiments / process_data.py

rosemariafontana

Update process_data.py

bdf951a verified over 1 year ago

raw

history blame

12.7 kB

	import os
	from pydantic import BaseModel, Field, validator, ValidationError
	import gradio as gr
	from openai import OpenAI
	from typing import List, Dict, Any, Optional, Literal, Union
	from enum import Enum
	from gradio_toggle import Toggle
	import json

	from schema_classes import FarmActivities, Interactions, Trial, FarmActivitiesLite, PlantingLite, Log, Soil, Yield

	# adding comment
	# Chatbot model
	os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")
	client = OpenAI()


	def generate_json(input_data, parameters):
	"""
	Function to prompt OpenAI API to generate structured JSON output.
	"""

	input_text = input_data["input_text"]
	model_version = parameters["model_version"]

	farm_prompt = "Extract the farm information."
	interactions_prompt = "Extract the interactions information."
	trial_prompt = "Extract the trial information."

	if input_data["input_context"]:
	farm_prompt = input_data["input_context"] + farm_prompt
	interactions_prompt = input_data["input_context"] + interactions_prompt
	trial_prompt = input_data["input_context"] + trial_prompt



	try:
	#Call OpenAI API to generate structured output based on prompt

	farm_info_response = client.beta.chat.completions.parse(
	model=model_version, # Use GPT model that supports structured output
	messages=[
	{"role": "system", "content": farm_prompt},
	{"role": "user", "content": input_text}
	],
	response_format=FarmActivities,
	)

	farm_generated_json = farm_info_response.choices[0].message.parsed


	print("FARM JSON: ")
	print(farm_generated_json) # debugging
	farm_pretty_json = farm_generated_json.json()


	interactions_response = client.beta.chat.completions.parse(
	model=model_version, # Use GPT model that supports structured output
	messages=[
	{"role": "system", "content": interactions_prompt},
	{"role": "user", "content": input_text}
	],
	response_format=Interactions,
	)

	interactions_generated_json = interactions_response.choices[0].message.parsed

	print("INTERACTIONS JSON: ")
	print(interactions_generated_json) # debugging 2
	interactions_pretty_json = interactions_generated_json.json()


	trial_response = client.beta.chat.completions.parse(
	model=model_version, # Use GPT model that supports structured output
	messages=[
	{"role": "system", "content": trial_prompt},
	{"role": "user", "content": input_text}
	],
	response_format=Trial,
	)

	trial_generated_json = trial_response.choices[0].message.parsed

	print("TRIALS JSON: ")
	print(trial_generated_json) # debugging 3

	trial_pretty_json = trial_generated_json.json()

	return farm_pretty_json, interactions_pretty_json, trial_pretty_json

	except ValidationError as e:
	return {"error": str(e)}
	except Exception as e:
	return {"error": "Failed to generate valid JSON. " + str(e)}

	# This is for the step-wise JSON creation
	def generate_json_pieces(specification, model_version, additional_json_creation_options, field_data_input, planting_data_input, logs_data_input, soil_data_input, yield_data_input):

	if additional_json_creation_options == "Explicit specific pieces":
	field_data_specification = field_data_input
	planting_data_specification = planting_data_input
	logs_data_specification = logs_data_input
	soil_data_specification = soil_data_input
	yield_data_specification = yield_data_input

	elif additional_json_creation_options == "Parse from one big input text":
	field_data_specification = specification
	planting_data_specification = specification
	logs_data_specification = specification
	soil_data_specification = specification
	yield_data_specification = specification

	try:
	# Call OpenAI API to generate structured output based on prompt
	field_response = client.beta.chat.completions.parse(
	model=model_version, # Use GPT model that supports structured output
	messages=[
	{"role": "system", "content": "Extract the field information."},
	{"role": "user", "content": field_data_specification}
	],
	response_format=FarmActivitiesLite,
	)

	plant_response = client.beta.chat.completions.parse(
	model=model_version, # Use GPT model that supports structured output
	messages=[
	{"role": "system", "content": "Extract the planting information."},
	{"role": "user", "content": planting_data_specification}
	],
	response_format=PlantingLite,
	)

	log_response = client.beta.chat.completions.parse(
	model=model_version, # Use GPT model that supports structured output
	messages=[
	{"role": "system", "content": "Extract the log information."},
	{"role": "user", "content": logs_data_specification}
	],
	response_format=Log,
	)

	soil_response = client.beta.chat.completions.parse(
	model=model_version, # Use GPT model that supports structured output
	messages=[
	{"role": "system", "content": "Extract the soil information."},
	{"role": "user", "content": soil_data_specification}
	],
	response_format=Soil,
	)

	yield_response = client.beta.chat.completions.parse(
	model=model_version, # Use GPT model that supports structured output
	messages=[
	{"role": "system", "content": "Extract the yield information."},
	{"role": "user", "content": yield_data_specification}
	],
	response_format=Yield,
	)

	combined_json = field_response.choices[0].message.parsed.copy()
	combined_json["plantings"] = plant_response.choices[0].message.parsed
	combined_json["plantings"]["logs"] = log_response.choices[0].message.parsed
	combined_json["plantings"]["soil"] = soil_response.choices[0].message.parsed
	combined_json["plantings"]["yield"] = yield_response.choices[0].message.parsed

	print(combined_json) # debugging

	pretty_json = combined_json.json()

	return pretty_json
	except Exception as e:
	return {"error": "Failed to generate valid JSON. " + str(e)}

	#def process_specifications(data, model_version, json_creation, additional_json_creation_options, field_data_input, planting_data_input, logs_data_input, soil_data_input, yield_data_input):
	# # This method just drives the process

	# Uncomment when working on flippers
	#if json_creation == "Single JSON Creation":
	# resulting_schema = generate_json(data, model_version)
	#elif json_creation == "Step-wise JSON Creation":
	# resulting_schema = generate_json_pieces(data, model_version, additional_json_creation_options, field_data_input, planting_data_input, logs_data_input, soil_data_input, yield_data_input)
	#return resulting_schema
	# global original_outputs, xml_outputs

	# output1, output2, output3 = generate_json(data, model_version)


	# return output1, output2, output3

	def pre_processing(input_data, parameters):
	# in the event there's a pre-prompt, process

	if parameters["chaining"]:

	input_text = input_data["input_text"]
	pre_processing_list = [parameters["context_pre_prompt"], parameters["summary_pre_prompt"], parameters["conversation_pre_prompt"], parameters["example_pre_prompt"]]

	print("PreProcessingList")
	print(pre_processing_list)
	for pre_prompt in pre_processing_list:
	try:
	print("Pre-Processing: ")
	if pre_prompt:
	print("Prompt: ")
	print(pre_prompt)
	print("Input Text: ")
	print(input_text)
	print("Model: ")
	print(parameters["model_version"])

	response = client.chat.completions.create(
	model=parameters["model_version"],
	messages=[
	{"role": "system", "content": pre_prompt},
	{"role": "user", "content": input_text}
	]
	)


	response_text = response.choices[0].message.content

	print("Response Text: ")
	print(response_text)

	input_text = response_text

	except Exception as e:
	print(f"Failed to parse response as JSON. Error was: {e}")

	input_data["input_context"] = False
	input_data["input_text"] = input_text
	return input_data
	else:
	input_context = f"You are processing farm activity, interactions, and trial data. Here's important context of the data {parameters['combined_prompt']}. With this context in mind, "
	input_data["input_context"] = input_context
	return input_data



	def process_specifications(input_data, parameters):
	# here is where parsing and other things will happen before
	if parameters["pre_prompt"] == True:
	processed_input = pre_processing(input_data, parameters)
	return generate_json(processed_input, parameters)
	else:
	input_data["input_context"] = False
	input_data["input_text"] = input_data["input_text"]
	return generate_json(input_data, parameters)


	def parse_survey_stack_parameters(data):
	processed_data = {}

	processed_data["model_version"] = data[0]['data']['modelversion']['value'][0]

	print("DATA: ")
	print(data)

	try:

	pre_promp_parameters = data[0]['data']['group_2']

	if pre_promp_parameters['preprompt']['value'][0] == 'continue_preprompts':
	processed_data["pre_prompt"] = True

	# Accessing context and other prompts, with defaults in case they are None
	processed_data["context_pre_prompt"] = pre_promp_parameters.get('contextpreprompt', {}).get('value', None)
	processed_data["summary_pre_prompt"] = pre_promp_parameters.get('summarypreprompt', {}).get('value', None)
	processed_data["conversation_pre_prompt"] = pre_promp_parameters.get('conversationpreprompt', {}).get('value', None)
	processed_data["example_pre_prompt"] = pre_promp_parameters.get('examplepreprompt', {}).get('value', None)

	# Check if chaining is set to "yes" or "no"
	chaining_value = pre_promp_parameters.get('prepromptchaining', {}).get('value', [None])[0]

	if chaining_value == "no":
	# Combine prompts if chaining is "no"
	combined_prompt = " ".join(
	filter(None, [
	processed_data["context_pre_prompt"],
	processed_data["summary_pre_prompt"],
	processed_data["conversation_pre_prompt"],
	processed_data["example_pre_prompt"]
	])
	)
	processed_data["chaining"] = False
	processed_data["combined_prompt"] = combined_prompt
	else:
	# Set combined_pre_prompt to None if chaining is enabled
	processed_data["chaining"] = True
	processed_data["combined_pre_prompt"] = None
	else:
	# Set fields to None if preprompt is not "continue_preprompts"
	processed_data["pre_prompt"] = False
	processed_data["context_pre_prompt"] = None
	processed_data["summary_pre_prompt"] = None
	processed_data["conversation_pre_prompt"] = None
	processed_data["example_pre_prompt"] = None
	processed_data["chaining"] = False
	processed_data["combined_pre_prompt"] = None

	except Exception as e:
	print(f"An error occurred: {e}")

	return processed_data

	def parse_survey_stack_data(data):
	processed_data = {}

	processed_data["input_text"] = data[0]['data']['inputtext']['value']

	return processed_data