Spaces:

our-sci
/

experimental-trial_data-translation

Sleeping

App Files Files Community

experimental-trial_data-translation / app.py

rosemariafontana

Update app.py

39a4aa4 verified over 1 year ago

raw

history blame contribute delete

21.3 kB

	import os
	from pydantic import BaseModel, Field, validator, ValidationError
	import gradio as gr
	from openai import OpenAI
	from typing import List, Dict, Any, Optional, Literal, Union
	from enum import Enum
	from gradio_toggle import Toggle
	from dicttoxml import dicttoxml
	import json

	# adding comment
	# Chatbot model
	os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")
	client = OpenAI()

	original_outputs = []
	xml_outputs = []

	# These are the necessary components that make up the Trials
	#class Variables(BaseModel):
	# controlled: List[str] = Field(..., title="Controlled Variables", description="A list of controlled variables, which will be constant (controlled) across all trials")
	# independent: List[str] = Field(..., title="Independent Variables", description="A list of independent variables (ie treatments), which will be intentionally varied across one or more trials")
	# outcome: List[str] = Field(..., title="Outcome Variables", description="A list of outcome variables (ie dependent or response variables)")

	class Treatment(BaseModel):
	name: str = Field(..., title="Name", description="The treatment name")
	description: str = Field(..., title="Description", description="The treatment description, including the conditions within this treatment")
	crops: List[str] = Field(..., title="Crops", description="A list of crops being tested in this treatment")
	fields: List[str] = Field(..., title="Fields", description="A list of fields in which this treatment has occured or will occur")
	#learnings: List[str] = Field(..., title="Learnings", description="A list of lessons learned from this experiment")
	#variables: Variables = Field(..., title="Variables", description="Variables (ie factors) in this experiment. Some variables are constant (controlled) and some will vary in order to learn something (independent)")
	#confoundingFactors: List[str] = Field(..., title="Confounding Factors", description="A list of factors which may impact the outcomes of the experiment that were not planned for")

	class Trial(BaseModel):
	name: str = Field(..., title="Name", description="The name of this trial")
	description: str = Field(..., title="Description", description="A description of this trial")
	treatments: List[Treatment] = Field(..., title="Treatments", description="A list of different treatments (strips or blocks with the same conditions applied) performed by the partner")

	#################################################################################
	# These are the necessary components that make up the Interactions
	class Role(str, Enum):
	PARTNER = 'partner'
	STAFF = 'staff'
	AGRONOMIST = 'agronomist'
	OTHER = 'other'

	class Person(BaseModel):
	name: str = Field(..., title="Name", description="Name of this person")
	role: Role = Field(..., title="Role", description="Role of this person")

	class Interactions(BaseModel):
	people: List[Person] = Field(..., title="People", description="People involved or mentioned during interaction")
	date: str = Field(..., title="Date of current interaction", description="Date of the interaction")
	nextMeeting: str = Field(..., title="Date of next meeting", description="Proposed date of the next future interaction")
	nextSteps: List[str] = Field(..., title="Next Steps", description="List of individual next steps derived from the interaction")
	summary: str = Field(..., title="Summary", description="Summary of the interaction")

	#################################################################################
	# These are the components for Farm Activities, Fields, and Plantings
	class Status(str, Enum):
	ACTIVE = 'active'
	ARCHIVED = 'archived'

	# Depending on how well this works, come back and hard-code this based on some parameter(s)
	class Convention(str, Enum):
	ACTIVITY = 'log--activity'
	OBSERVATION = 'log--observation'
	FLAMING = 'log--activity--flaming'
	GRAZING = 'log--activity--grazing'
	MOWING = 'log--activity--mowing'
	SOLARIZATION = 'log--activity--solarization'
	TERMINATION = 'log--activity--termination'
	TILLAGE = 'log--activity--tillage'
	HARVEST = 'log--activity--harvest'
	HERBICIDE = 'log--input--herbicide_or_pesticide'
	IRRIGATION = 'log--input--irrigation'
	LIME = 'log--input--lime'
	ORGANIC = 'log--input--organic_matter'
	SEEDTREAT = 'log--input--seed_treatment'
	SEEDLINGTREAT = 'log--input--seedling_treatment'
	MODUS = 'log--lab_test--modus_lab_test'
	SEEDING = 'log--seeding--seeding'
	TRANSPLANT = 'log--transplanting--transplant'

	class Structure(str, Enum):
	CLAY = 'clay'
	SANDYCLAY = 'sandy clay'
	SILTYCLAY = 'silty clay'
	SANDYCLAYLOAM = 'sandy clay loam'
	SILYCLAMLOAM = 'silty clay loam'
	CLAYLOAM = 'clay loam'
	SANDYLOAM = 'sandy loam'
	SILTLOAM = 'silt loam'
	LOAM = 'loam'
	LOAMYSAND = 'loamy sand'
	SAND = 'sand'
	SILT = 'silt'

	class Log(BaseModel):
	convention: Convention = Field(..., title="Logs", description="This log's convention (i.e. this log's category or type)")
	date: str = Field(..., title="Date", description="Date the log (i.e. action of the activity or input) was performed")
	description: str = Field(..., title="Description", description="A description of the details of the log (i.e. details about farm activity performed")

	class Soil(BaseModel):
	description: str = Field(..., title="Description", description="A general description of the soil")
	structure: List[Structure] = Field(..., title="Structure", description="The structure of the soil using options from the major soil texture classes (sand, clay, silt)")
	biology: str = Field(..., title="Biology", description="Biological activity levels of the soil, including fluffiness, worms and bugs, and other evidence of soil biological activity")

	class Yield(BaseModel):
	quantity: str = Field(..., title="Quantity", description="A description of the total yield (harvested amount) from this planting, including units when available")
	quality: str = Field(..., title="Quality", description="The product quality of the harvest. For example, small or large fruits, sweet or tart flavor, easily molding or containing mold, high number of product seconds, etc.")

	# It breaks if soil and yield aren't lists for some reason
	class Planting(BaseModel):
	name: str = Field(..., title="Name", description="The name of the planting")
	status: Status = Field(..., title="Status", description="The status of the planting. \"active\" is a planting which is currently still in the field. \"archived\" is a planting which is no longer in the field (has been terminated or harvested)")
	crop: List[str] = Field(..., title="Crop", description="A list of the crops in this planting")
	variety: List[str] = Field(..., title="Variety", description="A list of the crop varieties in this planting")
	logs: List[Log] = Field(..., title="Logs", description="A list of all the logs that are associated with the farm activities")
	soil: List[Soil] = Field(..., title="Soil", description="A single soil profile for this planting, containing only one soil description")
	yield_: List[Yield] = Field(..., title="Yield", description="One set of quantitative and qualitative yield observations for this planting")

	class FarmActivities(BaseModel):
	name: str = Field(..., title="Name", description="The name of the agricultural field.")
	description: str = Field(..., title="Description", description="The description of the agricultural field.")
	plantings: List[Planting] = Field(..., title="Plantings", description="All of the plantings which have occurred on this field.")

	# These are extra for the modular approach
	class FarmActivitiesLite(BaseModel):
	name: str = Field(..., title="Name", description="The name of the agricultural field.")
	description: str = Field(..., title="Description", description="The description of the agricultural field.")

	class PlantingLite(BaseModel):
	name: str = Field(..., title="Name", description="The name of the planting")
	status: Status = Field(..., title="Status", description="The status of the planting. \"active\" is a planting which is currently still in the field. \"archived\" is a planting which is no longer in the field (has been terminated or harvested)")
	crop: List[str] = Field(..., title="Crop", description="A list of the crops in this planting")
	variety: List[str] = Field(..., title="Variety", description="A list of the crop varieties in this planting")


	# This is to make stuff happen
	def generate_json(specification, model_version):
	"""
	Function to prompt OpenAI API to generate structured JSON output.
	"""

	try:
	#Call OpenAI API to generate structured output based on prompt
	farm_info_response = client.beta.chat.completions.parse(
	model=model_version, # Use GPT model that supports structured output
	messages=[
	{"role": "system", "content": "Extract the farm information."},
	{"role": "user", "content": specification}
	],
	response_format=FarmActivities,
	)

	if 'error' in farm_info_response:
	raise ValueError(f"API error: {interactions_response['error']['message']}")

	farm_generated_json = farm_info_response.choices[0].message.parsed
	print("FARM JSON: ")
	print(farm_generated_json) # debugging

	farm_pretty_json = farm_generated_json.json()

	interactions_response = client.beta.chat.completions.parse(
	model=model_version, # Use GPT model that supports structured output
	messages=[
	{"role": "system", "content": "Extract the interactions information."},
	{"role": "user", "content": specification}
	],
	response_format=Interactions,
	)

	if 'error' in interactions_response:
	raise ValueError(f"API error: {interactions_response['error']['message']}")

	interactions_generated_json = interactions_response.choices[0].message.parsed

	print("INTERACTIONS JSON: ")
	print(interactions_generated_json) # debugging 2

	interactions_pretty_json = interactions_generated_json.json()


	trial_response = client.beta.chat.completions.parse(
	model=model_version, # Use GPT model that supports structured output
	messages=[
	{"role": "system", "content": "Extract the trial information."},
	{"role": "user", "content": specification}
	],
	response_format=Trial,
	)

	if 'error' in trial_response:
	raise ValueError(f"API error: {trial_response['error']['message']}")

	trial_generated_json = trial_response.choices[0].message.parsed

	print("TRIALS JSON: ")
	print(trial_generated_json) # debugging 3

	trial_pretty_json = trial_generated_json.json()

	return farm_pretty_json, interactions_pretty_json, trial_pretty_json

	except ValidationError as e:
	return {"error": str(e)}
	except Exception as e:
	return {"error": "Failed to generate valid JSON. " + str(e)}

	# This is for the step-wise JSON creation
	def generate_json_pieces(specification, model_version, additional_json_creation_options, field_data_input, planting_data_input, logs_data_input, soil_data_input, yield_data_input):

	if additional_json_creation_options == "Explicit specific pieces":
	field_data_specification = field_data_input
	planting_data_specification = planting_data_input
	logs_data_specification = logs_data_input
	soil_data_specification = soil_data_input
	yield_data_specification = yield_data_input

	elif additional_json_creation_options == "Parse from one big input text":
	field_data_specification = specification
	planting_data_specification = specification
	logs_data_specification = specification
	soil_data_specification = specification
	yield_data_specification = specification

	try:
	# Call OpenAI API to generate structured output based on prompt
	field_response = client.beta.chat.completions.parse(
	model=model_version, # Use GPT model that supports structured output
	messages=[
	{"role": "system", "content": "Extract the field information."},
	{"role": "user", "content": field_data_specification}
	],
	response_format=FarmActivitiesLite,
	)

	plant_response = client.beta.chat.completions.parse(
	model=model_version, # Use GPT model that supports structured output
	messages=[
	{"role": "system", "content": "Extract the planting information."},
	{"role": "user", "content": planting_data_specification}
	],
	response_format=PlantingLite,
	)

	log_response = client.beta.chat.completions.parse(
	model=model_version, # Use GPT model that supports structured output
	messages=[
	{"role": "system", "content": "Extract the planting information."},
	{"role": "user", "content": logs_data_specification}
	],
	response_format=Log,
	)

	soil_response = client.beta.chat.completions.parse(
	model=model_version, # Use GPT model that supports structured output
	messages=[
	{"role": "system", "content": "Extract the planting information."},
	{"role": "user", "content": soil_data_specification}
	],
	response_format=Soil,
	)

	yield_response = client.beta.chat.completions.parse(
	model=model_version, # Use GPT model that supports structured output
	messages=[
	{"role": "system", "content": "Extract the planting information."},
	{"role": "user", "content": yield_data_specification}
	],
	response_format=Yield,
	)

	combined_json = field_response.choices[0].message.parsed.copy()
	combined_json["plantings"] = plant_response.choices[0].message.parsed
	combined_json["plantings"]["logs"] = log_response.choices[0].message.parsed
	combined_json["plantings"]["soil"] = soil_response.choices[0].message.parsed
	combined_json["plantings"]["yield"] = yield_response.choices[0].message.parsed

	print(combined_json) # debugging

	pretty_json = combined_json.json()

	if 'error' in response:
	raise ValueError(f"API error: {response['error']['message']}")

	return pretty_json

	except ValidationError as e:
	return {"error": str(e)}
	except Exception as e:
	return {"error": "Failed to generate valid JSON. " + str(e)}

	def process_specifications(data, model_version, json_creation, additional_json_creation_options, field_data_input, planting_data_input, logs_data_input, soil_data_input, yield_data_input):
	# This method just drives the process

	# Uncomment when working on flippers
	#if json_creation == "Single JSON Creation":
	# resulting_schema = generate_json(data, model_version)
	#elif json_creation == "Step-wise JSON Creation":
	# resulting_schema = generate_json_pieces(data, model_version, additional_json_creation_options, field_data_input, planting_data_input, logs_data_input, soil_data_input, yield_data_input)
	#return resulting_schema
	global original_outputs, xml_outputs

	output1, output2, output3 = generate_json(data, model_version)
	original_outputs = [output1, output2, output3]
	xml_outputs = []

	return output1, output2, output3, Toggle(visible=True)

	with gr.Blocks() as demo:
	data_input = gr.Textbox(label="Enter your data", placeholder="Type your data here")
	model_version_input = gr.Radio(["gpt-4o-2024-08-06", "gpt-4o-mini-2024-07-18"], label="Model Versions")

	# Hidden for demo purposes
	json_creation_input = gr.Radio(["Single JSON Creation", "Step-wise JSON Creation"], label="Modularity of JSON Approach", visible=False)

	additional_json_creation_options = gr.Radio(["Parse from one big input text", "Explicit specific pieces"], label="Additional Step-wise JSON Options", visible=False)

	# Explicit Specific Pieces
	field_data_input = gr.Textbox(label="Enter your data for field", placeholder="Field Name and Description", visible=False)
	planting_data_input = gr.Textbox(label="Enter your data for plantings", placeholder="Name, Status (active/archived), Crop, Crop variety", visible=False)
	logs_data_input = gr.Textbox(label="Enter your log data", placeholder="Convention, Date, Description", visible=False)
	soil_data_input = gr.Textbox(label="Enter your soil data", placeholder="Description, Structure, Biology", visible=False)
	yield_data_input = gr.Textbox(label="Enter your yield data", placeholder="Quantity, Quality", visible=False)

	with gr.Row():
	farm_output_box = gr.Textbox(label="Fields and Activities Output Data", interactive=False)
	interactions_output_box = gr.Textbox(label="Interactions Output Data", interactive=False)
	trials_output_box = gr.Textbox(label="Trials Output Data", interactive=False, info="Treatment learnings, variables (control, independent and outcome), and confounding factors are currently NOT included (as they break everything)")

	def update_visibility(radio, additional_options):
	value = radio
	if value == "Single JSON Creation":
	return [gr.Radio(visible=bool(0)), gr.Textbox(visible=bool(0)), gr.Textbox(visible=bool(0)), gr.Textbox(visible=bool(0)), gr.Textbox(visible=bool(0)), gr.Textbox(visible=bool(0))]
	elif value == "Step-wise JSON Creation" and (additional_options == None or additional_options == "Parse from one big input text"):
	return [gr.Radio(visible=bool(1)), gr.Textbox(visible=bool(0)), gr.Textbox(visible=bool(0)), gr.Textbox(visible=bool(0)), gr.Textbox(visible=bool(0)), gr.Textbox(visible=bool(0))]
	else:
	return [gr.Radio(visible=bool(1)), gr.Textbox(visible=bool(1)), gr.Textbox(visible=bool(1)), gr.Textbox(visible=bool(1)), gr.Textbox(visible=bool(1)), gr.Textbox(visible=bool(1))]

	def update_visibility2(radio):
	value = radio
	if value == "Explicit specific pieces":
	return [gr.Textbox(visible=bool(0)), gr.Textbox(visible=bool(1)), gr.Textbox(visible=bool(1)), gr.Textbox(visible=bool(1)), gr.Textbox(visible=bool(1)), gr.Textbox(visible=bool(1))]
	else:
	return [gr.Textbox(visible=bool(1)), gr.Textbox(visible=bool(0)), gr.Textbox(visible=bool(0)), gr.Textbox(visible=bool(0)), gr.Textbox(visible=bool(0)), gr.Textbox(visible=bool(0))]

	def update_toggle(toggle, farm_output_box, interactions_output_box, trials_output_box):
	global original_outputs, xml_outputs
	if toggle and not xml_outputs:
	farm_dict = json.loads(farm_output_box)
	interactions_dict = json.loads(interactions_output_box)
	trials_dict = json.loads(trials_output_box)

	farm_xml = dicttoxml(farm_dict)
	interactions_xml = dicttoxml(interactions_dict)
	trials_xml = dicttoxml(trials_dict)

	xml_outputs = [farm_xml, interactions_xml, trials_xml]
	return farm_xml, interactions_xml, trials_xml
	elif toggle and xml_outputs:
	return xml_outputs[0], xml_outputs[1], xml_outputs[2]
	else:
	return original_outputs[0], original_outputs[1], original_outputs[2]

	json_creation_input.change(fn=update_visibility, inputs=[json_creation_input, additional_json_creation_options], outputs=[additional_json_creation_options, field_data_input, planting_data_input, logs_data_input, soil_data_input, yield_data_input])
	additional_json_creation_options.change(fn=update_visibility2, inputs=[additional_json_creation_options], outputs=[data_input, field_data_input, planting_data_input, logs_data_input, soil_data_input, yield_data_input])

	toggle_output = Toggle(label="JSON <-> XML", value=False, info="Toggle Output Data", interactive=True, visible=False)

	submit_button = gr.Button("Generate JSON")
	submit_button.click(
	fn=process_specifications,
	inputs=[data_input, model_version_input, json_creation_input, additional_json_creation_options, field_data_input, planting_data_input, logs_data_input, soil_data_input, yield_data_input],
	outputs=[farm_output_box, interactions_output_box, trials_output_box, toggle_output]
	)

	clear_button = gr.ClearButton(components=[data_input, model_version_input, json_creation_input, additional_json_creation_options, field_data_input, planting_data_input, logs_data_input, soil_data_input, yield_data_input])
	toggle_output.change(fn=update_toggle, inputs=[toggle_output, farm_output_box, interactions_output_box, trials_output_box], outputs=[farm_output_box, interactions_output_box, trials_output_box])

	if __name__ == "__main__":
	demo.launch()