| import os |
| from pydantic import BaseModel, Field, validator, ValidationError |
| import gradio as gr |
| from openai import OpenAI |
| from typing import List, Dict, Any, Optional, Literal, Union |
| from enum import Enum |
| from gradio_toggle import Toggle |
| import json |
|
|
| from schema_classes import FarmActivities, Interactions, Trial, FarmActivitiesLite, PlantingLite, Log, Soil, Yield, InteractionsLite, TrialLite, Person, Treatment |
|
|
|
|
| |
| |
| |
| os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY") |
| client = OpenAI() |
|
|
| |
|
|
| |
|
|
| |
| |
| |
| |
|
|
| |
|
|
| |
| |
|
|
| |
| |
|
|
| |
| |
|
|
| |
|
|
| |
| |
| |
| |
|
|
| |
|
|
| |
| |
|
|
| |
| |
| |
| |
| |
| |
| |
| |
| |
|
|
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
|
|
|
|
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
|
|
| def generate_json(processed_data): |
| """ |
| Function to prompt OpenAI API to generate structured JSON output. |
| |
| Args: |
| |
| Returns: |
| 3 processed data-filled JSON objects: farm_pretty_json, interactions_pretty_json, trial_pretty_json |
| """ |
| print("Generating JSON Whole!") |
| input_text = processed_data["input_text"] |
| model_version = processed_data["parameters"]["model_version"] |
|
|
| farm_prompt = processed_data["prompts"]["firstschemaprompt"] |
| interactions_prompt = processed_data["prompts"]["secondschemaprompt"] |
| trial_prompt = processed_data["prompts"]["thirdschemaprompt"] |
|
|
| |
| try: |
| |
| |
| farm_info_response = client.beta.chat.completions.parse( |
| model=model_version, |
| messages=[ |
| {"role": "system", "content": farm_prompt}, |
| {"role": "user", "content": input_text} |
| ], |
| response_format=FarmActivities, |
| ) |
|
|
| farm_generated_json = farm_info_response.choices[0].message.parsed |
|
|
| |
| print("FARM JSON: ") |
| print(farm_generated_json) |
| farm_pretty_json = farm_generated_json.json() |
|
|
| |
| interactions_response = client.beta.chat.completions.parse( |
| model=model_version, |
| messages=[ |
| {"role": "system", "content": interactions_prompt}, |
| {"role": "user", "content": input_text} |
| ], |
| response_format=Interactions, |
| ) |
| |
| interactions_generated_json = interactions_response.choices[0].message.parsed |
|
|
| print("INTERACTIONS JSON: ") |
| print(interactions_generated_json) |
| interactions_pretty_json = interactions_generated_json.json() |
|
|
|
|
| trial_response = client.beta.chat.completions.parse( |
| model=model_version, |
| messages=[ |
| {"role": "system", "content": trial_prompt}, |
| {"role": "user", "content": input_text} |
| ], |
| response_format=Trial, |
| ) |
| |
| trial_generated_json = trial_response.choices[0].message.parsed |
|
|
| print("TRIALS JSON: ") |
| print(trial_generated_json) |
|
|
| trial_pretty_json = trial_generated_json.json() |
|
|
| return farm_pretty_json, interactions_pretty_json, trial_pretty_json |
|
|
| except ValidationError as e: |
| return {"error": str(e)} |
| except Exception as e: |
| return {"error": "Failed to generate valid JSON. " + str(e)} |
|
|
|
|
| |
| def generate_json_pieces(processed_data): |
| """ |
| This is primarily for one of the flippers, which allows each individual JSON section to be created individually, then concatenates them all together. |
| It is proposed that perhaps the individual calls to the model will be more robust than giving the model all the data at once. |
| |
| Args: |
| Args: |
| |
| parameters: (dict) All of the individual parameters and "flippers" |
| parameters["model_version"] = (str) what model should be used |
| parameters["chaining"] = (bool) whether or not the preprocessed input context should be chained (given to multiple models) |
| parameters["context_pre_prompt"], parameters["summary_pre_prompt"], parameters["conversation_pre_prompt"], parameters["example_pre_prompt"] = (str) all of the pre-prompts, separated |
| parameters["combined_pre_prompt"] = (str) concatenated individual pre-prompts |
| parameters["additional_json_pieces_options"] = (str) "Explicit specific pieces" or "Parse from one big input text" to indicate whether it's many function calls on one input text or many function calls on smaller pieces of input texts |
| |
| Returns: |
| (str - json) A final combined JSON containing the data filled schema for Farm Activites |
| """ |
| print("Generating JSON Pieces!") |
| |
| print("INPUT DATA") |
| print(processed_data) |
| |
| model_version = processed_data["parameters"]["model_version"] |
|
|
| print("Model Version") |
| print(model_version) |
| |
| if processed_data["parameters"]["pre_process"] == "yes": |
| print("Pre prompt is true") |
| field_data_input = processed_data["input_text_pieces"]["pre_processed_pieces"]["field_data_input"] |
| planting_data_input = processed_data["input_text_pieces"]["pre_processed_pieces"]["planting_data_input"] |
| log_data_input = processed_data["input_text_pieces"]["pre_processed_pieces"]["log_data_input"] |
| soil_data_input = processed_data["input_text_pieces"]["pre_processed_pieces"]["soil_data_input"] |
| yield_data_input = processed_data["input_text_pieces"]["pre_processed_pieces"]["yield_data_input"] |
|
|
| interaction_data_input = processed_data["input_text_pieces"]["pre_processed_pieces"]["interaction_data_input"] |
| person_data_input = processed_data["input_text_pieces"]["pre_processed_pieces"]["person_data_input"] |
|
|
| trial_data_input = processed_data["input_text_pieces"]["pre_processed_pieces"]["trial_data_input"] |
| treatment_data_input = processed_data["input_text_pieces"]["pre_processed_pieces"]["treatment_data_input"] |
| else: |
| print("Pre prompt is false") |
| field_data_input = processed_data["input_text_pieces"]["field_data_input"] |
| planting_data_input = processed_data["input_text_pieces"]["planting_data_input"] |
| log_data_input = processed_data["input_text_pieces"]["log_data_input"] |
| soil_data_input = processed_data["input_text_pieces"]["soil_data_input"] |
| yield_data_input = processed_data["input_text_pieces"]["yield_data_input"] |
|
|
| interaction_data_input = processed_data["input_text_pieces"]["interaction_data_input"] |
| person_data_input = processed_data["input_text_pieces"]["person_data_input"] |
|
|
| trial_data_input = processed_data["input_text_pieces"]["trial_data_input"] |
| treatment_data_input = processed_data["input_text_pieces"]["treatment_data_input"] |
|
|
|
|
| |
| print("Setting prompts") |
| field_prompt = processed_data["prompts"]["first_schema_prompt_one"] |
| plant_prompt = processed_data["prompts"]["first_schema_prompt_two"] |
| log_prompt = processed_data["prompts"]["first_schema_prompt_three"] |
| soil_prompt = processed_data["prompts"]["first_schema_prompt_four"] |
| yield_prompt = processed_data["prompts"]["first_schema_prompt_five"] |
| |
| interaction_prompt = processed_data["prompts"]["second_schema_prompt_one"] |
| person_prompt = processed_data["prompts"]["second_schema_prompt_two"] |
| |
| trial_prompt = processed_data["prompts"]["third_schema_prompt_one"] |
| treatment_prompt = processed_data["prompts"]["third_schema_prompt_two"] |
|
|
|
|
| try: |
| |
| print("Getting all responses in pieces, starting with field response") |
|
|
| |
| print("Field prompt") |
| print(field_prompt) |
|
|
| print("Field data input") |
| print(field_data_input) |
| |
| field_response = client.beta.chat.completions.parse( |
| model=model_version, |
| messages=[ |
| {"role": "system", "content": field_prompt}, |
| {"role": "user", "content": field_data_input} |
| ], |
| response_format=FarmActivitiesLite, |
| ) |
|
|
| field_generated_json = field_response.choices[0].message.parsed |
| print(type(field_generated_json)) |
|
|
| |
| print("FIELD JSON: ") |
| field_pretty_json = field_generated_json.dict() |
| print(field_pretty_json) |
|
|
| |
| plant_response = client.beta.chat.completions.parse( |
| model=model_version, |
| messages=[ |
| {"role": "system", "content": plant_prompt}, |
| {"role": "user", "content": planting_data_input} |
| ], |
| response_format=PlantingLite, |
| ) |
|
|
| plant_generated_json = plant_response.choices[0].message.parsed |
|
|
| |
| print("PLANT JSON: ") |
| plant_pretty_json = plant_generated_json.dict() |
| print(plant_pretty_json) |
|
|
| log_response = client.beta.chat.completions.parse( |
| model=model_version, |
| messages=[ |
| {"role": "system", "content": log_prompt}, |
| {"role": "user", "content": log_data_input} |
| ], |
| response_format=Log, |
| ) |
|
|
| log_generated_json = log_response.choices[0].message.parsed |
|
|
| |
| print("LOG JSON: ") |
| log_pretty_json = log_generated_json.dict() |
| print(log_pretty_json) |
|
|
| soil_response = client.beta.chat.completions.parse( |
| model=model_version, |
| messages=[ |
| {"role": "system", "content": soil_prompt}, |
| {"role": "user", "content": soil_data_input} |
| ], |
| response_format=Soil, |
| ) |
|
|
| soil_generated_json = soil_response.choices[0].message.parsed |
|
|
| |
| print("SOIL JSON: ") |
| soil_pretty_json = soil_generated_json.dict() |
| print(soil_pretty_json) |
|
|
| yield_response = client.beta.chat.completions.parse( |
| model=model_version, |
| messages=[ |
| {"role": "system", "content": yield_prompt}, |
| {"role": "user", "content": yield_data_input} |
| ], |
| response_format=Yield, |
| ) |
|
|
| yield_generated_json = yield_response.choices[0].message.parsed |
|
|
| |
| print("YIELD JSON: ") |
| yield_pretty_json = yield_generated_json.dict() |
| print(yield_pretty_json) |
|
|
| plantings = { |
| **plant_pretty_json, |
| "logs": log_pretty_json, |
| "soil": soil_pretty_json, |
| "yield_": yield_pretty_json |
| } |
| |
| farm_activities = { |
| **field_pretty_json, |
| "plantings": plantings |
| } |
|
|
| print("ADDED DICTS") |
| print(farm_activities) |
| print("FINAL JSON: ") |
| final_pretty_farm_activity_json = json.dumps(farm_activities, indent=4) |
| print(final_pretty_farm_activity_json) |
|
|
|
|
| |
| print("Interaction prompt") |
| print(interaction_prompt) |
|
|
| print("Interaction data input") |
| print(interaction_data_input) |
| |
| interaction_response = client.beta.chat.completions.parse( |
| model=model_version, |
| messages=[ |
| {"role": "system", "content": interaction_prompt}, |
| {"role": "user", "content": interaction_data_input} |
| ], |
| response_format=InteractionsLite, |
| ) |
|
|
| interaction_generated_json = interaction_response.choices[0].message.parsed |
| |
| print("INTERACTION JSON: ") |
| interaction_pretty_json = interaction_generated_json.dict() |
| print(interaction_pretty_json) |
|
|
| print("Person prompt") |
| print(person_prompt) |
|
|
| print("Person data input") |
| print(person_data_input) |
| |
| person_response = client.beta.chat.completions.parse( |
| model=model_version, |
| messages=[ |
| {"role": "system", "content": person_prompt}, |
| {"role": "user", "content": person_data_input} |
| ], |
| response_format=Person, |
| ) |
|
|
| person_generated_json = person_response.choices[0].message.parsed |
| |
| print("PERSON JSON: ") |
| person_pretty_json = person_generated_json.dict() |
| print(person_pretty_json) |
|
|
| interactions = { |
| **interaction_pretty_json, |
| "people": person_pretty_json |
| } |
|
|
| print("ADDED DICTS 2") |
| print(interactions) |
| print("FINAL JSON: ") |
| final_pretty_interactions_json = json.dumps(interactions, indent=4) |
| print(final_pretty_interactions_json) |
|
|
| |
| print("Trial prompt") |
| print(trial_prompt) |
|
|
| print("Trial data input") |
| print(trial_data_input) |
| |
| trial_response = client.beta.chat.completions.parse( |
| model=model_version, |
| messages=[ |
| {"role": "system", "content": trial_prompt}, |
| {"role": "user", "content": trial_data_input} |
| ], |
| response_format=TrialLite, |
| ) |
|
|
| trial_generated_json = trial_response.choices[0].message.parsed |
| |
| print("TRIAL JSON: ") |
| trial_pretty_json = trial_generated_json.dict() |
| print(trial_pretty_json) |
|
|
| print("Treatment prompt") |
| print(treatment_prompt) |
|
|
| print("Treatment data input") |
| print(treatment_data_input) |
| |
| treatment_response = client.beta.chat.completions.parse( |
| model=model_version, |
| messages=[ |
| {"role": "system", "content": treatment_prompt}, |
| {"role": "user", "content": treatment_data_input} |
| ], |
| response_format=Treatment, |
| ) |
|
|
| treatment_generated_json = treatment_response.choices[0].message.parsed |
| |
| print("TREATMENT JSON: ") |
| treatment_pretty_json = treatment_generated_json.dict() |
| print(treatment_pretty_json) |
|
|
| trials = { |
| **trial_pretty_json, |
| "treatments": treatment_pretty_json |
| } |
|
|
| print("ADDED DICTS 3") |
| print(trials) |
| print("TREATMENT JSON: ") |
| final_pretty_trials_json = json.dumps(trials, indent=4) |
| print(final_pretty_trials_json) |
|
|
| return final_pretty_farm_activity_json, final_pretty_interactions_json, final_pretty_trials_json |
| except Exception as e: |
| return {"error": "Failed to generate valid JSON. " + str(e)} |
| |
|
|
| def pre_processing(processed_data): |
| """ |
| In the event there's a pre-prompt, process the pre-prompts and input text accordingly |
| |
| Args: |
| |
| Returns: |
| (dict) input_data |
| input_data["input_context"] = (str) the text which should be used as context or "EMPTY" to indicate there is no context |
| input_data["input_text"] = (str) input text |
| """ |
| print("Starting preprocessing") |
|
|
|
|
| pre_processing_list = [processed_data.get('parameters', {}).get('preprocessingprompt1', None), processed_data.get('parameters', {}).get('preprocessingprompt2', None), processed_data.get('parameters', {}).get('preprocessingprompt3', None)] |
|
|
| print("Preprocessing list") |
| print(pre_processing_list) |
|
|
| print("Model Version") |
| model_version = processed_data["parameters"]["preprocessmodelversion"] |
| print(model_version) |
| |
| if processed_data["inputstyle"] == "individual-pieces-input-text": |
| print("Stepwise Creation") |
| processed_data["input_text_pieces"]["pre_processed_pieces"] = {} |
|
|
| for text_label, text_body in processed_data["input_text_pieces"].items(): |
| if 'data_input' in text_label: |
| for pre_prompt in pre_processing_list: |
| if pre_prompt: |
| print("Text Label") |
| print(text_label) |
| print("Prompt followed by data entered") |
| print(pre_prompt) |
| print(text_body) |
| response = client.chat.completions.create( |
| model=model_version, |
| messages=[ |
| {"role": "system", "content": pre_prompt}, |
| {"role": "user", "content": text_body} |
| ] |
| ) |
| |
| response_text = response.choices[0].message.content |
| print("Response text") |
| print(response_text) |
| |
| processed_data["input_text_pieces"]["pre_processed_pieces"][text_label] = response_text |
| print("PRE PROCESSED INPUT TEXT FOR TEXT LABEL: ") |
| print(text_label) |
| print("RESPONSE THAT WILL BE USED: ") |
| print(response_text) |
|
|
| return processed_data |
| |
| if processed_data["inputstyle"] == "big-block-input-text": |
| |
| input_text = processed_data["input_text"] |
|
|
| for pre_prompt in pre_processing_list: |
| try: |
| print("Pre-Processing: ") |
| if pre_prompt: |
| print("Prompt: ") |
| print(pre_prompt) |
| print("Input Text: ") |
| print(input_text) |
| print("Model: ") |
| print(model_version) |
| |
| response = client.chat.completions.create( |
| model=model_version, |
| messages=[ |
| {"role": "system", "content": pre_prompt}, |
| {"role": "user", "content": input_text} |
| ] |
| ) |
| |
| response_text = response.choices[0].message.content |
| |
| print("Response Text: ") |
| print(response_text) |
| |
| input_text = response_text |
| print("RESPONSE THAT WILL BE USED: ") |
| print(response_text) |
| |
| except Exception as e: |
| print(f"Failed to parse response as JSON. Error was: {e}") |
|
|
| processed_data["input_text"] = input_text |
| return processed_data |
| |
| |
| def process_specifications(processed_data): |
| """ |
| Once the parameters and data are processed, do the pre-processing and then generate JSONs |
| |
| Args: |
| |
| Returns: |
| 3 processed data-filled JSON objects: farm_pretty_json, interactions_pretty_json, trial_pretty_json |
| """ |
| print("Processing specifications") |
| print("here is the processed data thus far") |
|
|
| print(processed_data) |
| |
| |
|
|
| if processed_data["inputstyle"] == "individual-pieces-input-text": |
| print("You are continuing with step-wise creation with your individual text pieces") |
| if processed_data["parameters"]["pre_process"] == "yes": |
| print("You are continuing with pre_prompt processing") |
| processed_input = pre_processing(processed_data) |
| else: |
| print("You have elsed into no pre-processing") |
| processed_input = processed_data |
| return generate_json_pieces(processed_input) |
| elif processed_data["inputstyle"] == "big-block-input-text": |
| print("You are elifing into single json creation") |
| if processed_data["parameters"]["pre_process"] == "yes": |
| print("You are preprocessing now") |
| processed_input = pre_processing(processed_data) |
| else: |
| print("You do not have any preprocessing now") |
| processed_input = processed_data |
| return generate_json(processed_input) |
| |
| def parse_survey_stack(data): |
| """ |
| Parse the incoming data from the survey stack survey |
| |
| Args: |
| data: (json) JSON retrieved from surveystack API after retrieving survey info/details |
| |
| Returns: |
| processed_data |
| processed_data["input_text"] = (str) the raw input text |
| """ |
| print("PROCESSING SURVEY STACK DATA") |
| processed_data = {} |
|
|
| print("JUST PRINTING OUT THE DATA FOR YA") |
|
|
| print(data) |
|
|
| |
| processed_data["inputstyle"] = data[0]['data']['inputstyle']['value'][0] |
| print("STEPWISE?: " + str(processed_data["inputstyle"])) |
|
|
| |
| processed_data["prompts"] = {} |
| |
| if processed_data["inputstyle"] == "individual-pieces-input-text": |
| print("IN THE STEP") |
| farm_management_inputs = data[0]['data']['input_data']['interview'] |
| print("FARM MANAGEMENT INPUTS" + str(farm_management_inputs)) |
| |
| processed_data["input_text_pieces"] = {} |
| processed_data["input_text_pieces"]["field_data_input"] = farm_management_inputs.get('field_data_input', {}).get('value', None) |
| processed_data["input_text_pieces"]["planting_data_input"] = farm_management_inputs.get('planting_data_input', {}).get('value', None) |
| processed_data["input_text_pieces"]["log_data_input"] = farm_management_inputs.get('log_data_input', {}).get('value', None) |
| processed_data["input_text_pieces"]["soil_data_input"] = farm_management_inputs.get('soil_data_input', {}).get('value', None) |
| processed_data["input_text_pieces"]["yield_data_input"] = farm_management_inputs.get('yield_data_input', {}).get('value', None) |
| processed_data["input_text"] = "EMPTY" |
|
|
| print("NEXT SCHEMA INPUTS") |
| interactions_inputs = data[0]['data']['summary'] |
| print("INTERACTIONS INPUTS" + str(interactions_inputs)) |
| processed_data["input_text_pieces"]["interaction_data_input"] = interactions_inputs.get('interaction_data_input', {}).get('value', None) |
| processed_data["input_text_pieces"]["person_data_input"] = interactions_inputs.get('person_data_input', {}).get('value', None) |
|
|
| print("NEXT SCHEMA INPUTS 2") |
| trials_inputs = data[0]['data']['trial'] |
| print("TRIALS INPUTS" + str(trials_inputs)) |
| processed_data["input_text_pieces"]["trial_data_input"] = trials_inputs.get('trial_data_input', {}).get('value', None) |
| processed_data["input_text_pieces"]["treatment_data_input"] = trials_inputs.get('treatment_data_input', {}).get('value', None) |
|
|
| print("NOW ONTO THE PROMPTS") |
| input_item = data[0]['data']['sections_prompt'] |
| print("PROMPTS INPUTS FOR PIECES" + str(input_item)) |
| processed_data["prompts"]["first_schema_prompt_one"] = input_item.get('firstschemaprompt1', {}).get('value') |
| processed_data["prompts"]["first_schema_prompt_two"] = input_item.get('firstschemaprompt2', {}).get('value') |
| processed_data["prompts"]["first_schema_prompt_three"] = input_item.get('firstschemaprompt3', {}).get('value') |
| processed_data["prompts"]["first_schema_prompt_four"] = input_item.get('firstschemaprompt4', {}).get('value') |
| processed_data["prompts"]["first_schema_prompt_five"] = input_item.get('firstschemaprompt5', {}).get('value') |
| processed_data["prompts"]["second_schema_prompt_one"] = input_item.get('secondschemaprompt1', {}).get('value') |
| processed_data["prompts"]["second_schema_prompt_two"] = input_item.get('secondschemaprompt2', {}).get('value') |
| processed_data["prompts"]["third_schema_prompt_one"] = input_item.get('thirdschemaprompt1', {}).get('value') |
| processed_data["prompts"]["third_schema_prompt_two"] = input_item.get('thirdschemaprompt2', {}).get('value') |
|
|
| elif processed_data["inputstyle"] == "big-block-input-text": |
| print("IN THE SINGLE") |
| processed_data["input_text"] = data[0]['data']['onelonginputtext']['value'] |
| input_item = data[0]['data']['schema_prompt'] |
| print("INPUT TEXT SINGLE BLOCK" + str(processed_data["input_text"])) |
| print("INPUT ITEM" + str(input_item)) |
|
|
| print("Input Item") |
| print(input_item) |
|
|
| print("Processed input text") |
| print(processed_data["input_text"]) |
|
|
| print("NOW ONTO THE PROMPTS from Input Item") |
| processed_data["prompts"]["firstschemaprompt"] = input_item.get('firstschemaprompt', {}).get('value') |
| processed_data["prompts"]["secondschemaprompt"] = input_item.get('secondschemaprompt', {}).get('value') |
| processed_data["prompts"]["thirdschemaprompt"] = input_item.get('thirdschemaprompt', {}).get('value') |
| |
|
|
| |
| parameter_data = data[0]['data']['parameters'] |
| print("PARAMETERS" + str(parameter_data)) |
| |
| processed_data["parameters"] = {} |
| processed_data["parameters"]["modelversion"] = parameter_data.get('modelversion', {}).get('value')[0] |
| processed_data["parameters"]["preprocessdata"] = parameter_data.get('preprocessdata', {}).get('value')[0] |
| processed_data["parameters"]["promptstyle"] = parameter_data.get('promptstyle', {}).get('value') |
|
|
| if processed_data["parameters"]["preprocessdata"] == "yes": |
| processed_data["parameters"]["preprocessmodelversion"] = parameter_data.get('preprocessmodelversion', {}).get('value')[0] |
| processed_data["parameters"]["multiplepreprompts"] = parameter_data.get('multiplepreprompts', {}).get('value') |
| processed_data["parameters"]["prepromptstyle"] = parameter_data.get('prepromptstyle', {}).get('value') |
| processed_data["parameters"]["preprocessingprompt1"] = parameter_data.get('preprocessingprompt1', {}).get('value') |
| processed_data["parameters"]["preprocessingprompt2"] = parameter_data.get('preprocessingprompt2', {}).get('value') |
| processed_data["parameters"]["preprocessingprompt3"] = parameter_data.get('preprocessingprompt3', {}).get('value') |
| |
| print("RETURNING DATA") |
| print(processed_data) |
| |
| return processed_data |
|
|