Spaces:

our-sci
/

experimental-trial_data-translation

Sleeping

File size: 21,346 Bytes

7744ffd
 
e0774f3
7744ffd
 
360052a
89523ef
6dbb429
de6114e
e0774f3
a525d9f
7744ffd
 
 
e0774f3
4fe308d
 
0c7d480
5a083d5
feb4046
aaeb0d7
263ab40
 
5a083d5
96d3248
 
 
 
 
feb4046
 
 
5a083d5
 
 
 
96d3248
5a083d5
 
360052a
f7278cf
 
 
 
 
360052a
f7278cf
 
 
360052a
f7278cf
 
 
 
 
 
360052a
5a083d5
360052a
 
 
 
 
 
 
6f97f98
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
360052a
 
6f97f98
 
 
 
 
 
 
 
 
 
 
 
a91b089
 
 
 
 
4be9a2e
360052a
 
ce95d69
4e09ab2
 
a91b089
 
 
bc25bd7
 
360052a
 
ce95d69
360052a
 
 
bc25bd7
 
e0774f3
85af614
360052a
 
 
ccf0c65
 
 
 
 
 
 
 
 
 
 
360052a
 
 
d278c68
7744ffd
 
 
e0774f3
7744ffd
460cfd4
 
 
 
 
 
 
 
 
 
 
 
06dde0f
460cfd4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
06dde0f
460cfd4
ec69e62
460cfd4
 
5a083d5
460cfd4
5a083d5
 
 
 
 
 
 
 
 
 
 
06dde0f
 
 
5a083d5
ec69e62
 
 
f7278cf
5a083d5
f7278cf
7e06873
e0774f3
7744ffd
 
 
 
e0774f3
ccf0c65
 
 
 
a4907a2
 
 
 
 
 
 
 
 
 
 
 
ccf0c65
a4907a2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ccf0c65
a4907a2
 
 
 
 
 
 
 
ccf0c65
a4907a2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ccf0c65
a4907a2
ccf0c65
a4907a2
 
 
 
ccf0c65
a4907a2
 
 
 
7744ffd
ccf0c65
7744ffd
9c22eb2
 
f1c6894
 
 
 
 
4fe308d
 
b37c269
0c7d480
4fe308d
0c7d480
b37c269
e0774f3
5f0757f
 
 
d35e6e6
 
 
5f0757f
 
 
ccf0c65
 
 
 
 
 
ae688e0
3189a9e
0a44ca5
 
39a4aa4
5f0757f
3ef13e8
e9d7866
67ce899
 
9bfa686
3ce9eeb
5f0757f
67ce899
c425f16
ccf0c65
 
 
a4907a2
ccf0c65
a4907a2
0f80413
6dbb429
4fe308d
0c7d480
6dbb429
 
 
0c7d480
 
 
 
 
ae688e0
0c7d480
 
 
6dbb429
0c7d480
89523ef
3ef13e8
a4907a2
89523ef
6dbb429
9c22eb2
5f0757f
 
 
ccf0c65
89523ef
5f0757f
e0774f3
0f80413
6dbb429
127a310
e0774f3
7744ffd

import os
from pydantic import BaseModel, Field, validator, ValidationError
import gradio as gr
from openai import OpenAI
from typing import List, Dict, Any, Optional, Literal, Union
from enum import Enum
from gradio_toggle import Toggle
from dicttoxml import dicttoxml
import json

# adding comment
# Chatbot model 
os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")
client = OpenAI()

original_outputs = []
xml_outputs = []

# These are the necessary components that make up the Trials 
#class Variables(BaseModel):
#    controlled: List[str] = Field(..., title="Controlled Variables", description="A list of controlled variables, which will be constant (controlled) across all trials")
#    independent: List[str] = Field(..., title="Independent Variables", description="A list of independent variables (ie treatments), which will be intentionally varied across one or more trials")
#    outcome: List[str] = Field(..., title="Outcome Variables", description="A list of outcome variables (ie dependent or response variables)")

class Treatment(BaseModel):
    name: str = Field(..., title="Name", description="The treatment name")
    description: str = Field(..., title="Description", description="The treatment description, including the conditions within this treatment")
    crops: List[str] = Field(..., title="Crops", description="A list of crops being tested in this treatment")
    fields: List[str] = Field(..., title="Fields", description="A list of fields in which this treatment has occured or will occur")
    #learnings: List[str] = Field(..., title="Learnings", description="A list of lessons learned from this experiment")
    #variables: Variables = Field(..., title="Variables", description="Variables (ie factors) in this experiment. Some variables are constant (controlled) and some will vary in order to learn something (independent)")
    #confoundingFactors: List[str] = Field(..., title="Confounding Factors", description="A list of factors which may impact the outcomes of the experiment that were not planned for")

class Trial(BaseModel):
    name: str = Field(..., title="Name", description="The name of this trial") 
    description: str = Field(..., title="Description", description="A description of this trial") 
    treatments: List[Treatment] = Field(..., title="Treatments", description="A list of different treatments (strips or blocks with the same conditions applied) performed by the partner") 

#################################################################################
# These are the necessary components that make up the Interactions
class Role(str, Enum):
    PARTNER = 'partner'
    STAFF = 'staff'
    AGRONOMIST = 'agronomist'
    OTHER = 'other'
    
class Person(BaseModel):
    name: str = Field(..., title="Name", description="Name of this person") 
    role: Role = Field(..., title="Role", description="Role of this person")
    
class Interactions(BaseModel):
    people: List[Person] = Field(..., title="People", description="People involved or mentioned during interaction")
    date: str = Field(..., title="Date of current interaction", description="Date of the interaction")
    nextMeeting: str = Field(..., title="Date of next meeting", description="Proposed date of the next future interaction")
    nextSteps: List[str] = Field(..., title="Next Steps", description="List of individual next steps derived from the interaction")
    summary: str = Field(..., title="Summary", description="Summary of the interaction")

#################################################################################
# These are the components for Farm Activities, Fields, and Plantings
class Status(str, Enum):
    ACTIVE = 'active'
    ARCHIVED = 'archived'

# Depending on how well this works, come back and hard-code this based on some parameter(s)
class Convention(str, Enum):
    ACTIVITY = 'log--activity'
    OBSERVATION = 'log--observation'
    FLAMING = 'log--activity--flaming'
    GRAZING = 'log--activity--grazing'
    MOWING = 'log--activity--mowing'
    SOLARIZATION = 'log--activity--solarization'
    TERMINATION = 'log--activity--termination'
    TILLAGE = 'log--activity--tillage'
    HARVEST = 'log--activity--harvest'
    HERBICIDE = 'log--input--herbicide_or_pesticide'
    IRRIGATION = 'log--input--irrigation'
    LIME = 'log--input--lime'
    ORGANIC = 'log--input--organic_matter'
    SEEDTREAT = 'log--input--seed_treatment'
    SEEDLINGTREAT = 'log--input--seedling_treatment'
    MODUS = 'log--lab_test--modus_lab_test'
    SEEDING = 'log--seeding--seeding'
    TRANSPLANT = 'log--transplanting--transplant'

class Structure(str, Enum):
    CLAY = 'clay'
    SANDYCLAY = 'sandy clay'
    SILTYCLAY = 'silty clay'
    SANDYCLAYLOAM = 'sandy clay loam'
    SILYCLAMLOAM = 'silty clay loam'
    CLAYLOAM = 'clay loam'
    SANDYLOAM = 'sandy loam'
    SILTLOAM = 'silt loam'
    LOAM = 'loam'
    LOAMYSAND = 'loamy sand'
    SAND = 'sand'
    SILT = 'silt'
  
class Log(BaseModel): 
    convention: Convention = Field(..., title="Logs", description="This log's convention (i.e. this log's category or type)")
    date: str = Field(..., title="Date", description="Date the log (i.e. action of the activity or input) was performed")
    description: str = Field(..., title="Description", description="A description of the details of the log (i.e. details about farm activity performed")

class Soil(BaseModel): 
    description: str = Field(..., title="Description", description="A general description of the soil")
    structure: List[Structure] = Field(..., title="Structure", description="The structure of the soil using options from the major soil texture classes (sand, clay, silt)")
    biology: str = Field(..., title="Biology", description="Biological activity levels of the soil, including fluffiness, worms and bugs, and other evidence of soil biological activity")

class Yield(BaseModel):
    quantity: str = Field(..., title="Quantity", description="A description of the total yield (harvested amount) from this planting, including units when available")
    quality: str = Field(..., title="Quality", description="The product quality of the harvest.  For example, small or large fruits, sweet or tart flavor, easily molding or containing mold, high number of product seconds, etc.")

# It breaks if soil and yield aren't lists for some reason
class Planting(BaseModel):
    name: str = Field(..., title="Name", description="The name of the planting")
    status: Status = Field(..., title="Status", description="The status of the planting. \"active\" is a planting which is currently still in the field.  \"archived\" is a planting which is no longer in the field (has been terminated or harvested)")
    crop: List[str] = Field(..., title="Crop", description="A list of the crops in this planting")
    variety: List[str] = Field(..., title="Variety", description="A list of the crop varieties in this planting")
    logs: List[Log] = Field(..., title="Logs", description="A list of all the logs that are associated with the farm activities")
    soil: List[Soil] = Field(..., title="Soil", description="A single soil profile for this planting, containing only one soil description")
    yield_: List[Yield] = Field(..., title="Yield", description="One set of quantitative and qualitative yield observations for this planting") 

class FarmActivities(BaseModel):
    name: str = Field(..., title="Name", description="The name of the agricultural field.")
    description: str = Field(..., title="Description", description="The description of the agricultural field.")
    plantings: List[Planting] = Field(..., title="Plantings", description="All of the plantings which have occurred on this field.")

# These are extra for the modular approach 
class FarmActivitiesLite(BaseModel):
    name: str = Field(..., title="Name", description="The name of the agricultural field.")
    description: str = Field(..., title="Description", description="The description of the agricultural field.")
    
class PlantingLite(BaseModel):
    name: str = Field(..., title="Name", description="The name of the planting")
    status: Status = Field(..., title="Status", description="The status of the planting. \"active\" is a planting which is currently still in the field.  \"archived\" is a planting which is no longer in the field (has been terminated or harvested)")
    crop: List[str] = Field(..., title="Crop", description="A list of the crops in this planting")
    variety: List[str] = Field(..., title="Variety", description="A list of the crop varieties in this planting")
    

# This is to make stuff happen
def generate_json(specification, model_version): 
    """
    Function to prompt OpenAI API to generate structured JSON output.
    """

    try:
        #Call OpenAI API to generate structured output based on prompt
        farm_info_response = client.beta.chat.completions.parse(
            model=model_version,  # Use GPT model that supports structured output
            messages=[
                {"role": "system", "content": "Extract the farm information."},
                {"role": "user", "content": specification}
            ],
            response_format=FarmActivities,
        )

        if 'error' in farm_info_response:
            raise ValueError(f"API error: {interactions_response['error']['message']}")
            
        farm_generated_json = farm_info_response.choices[0].message.parsed
        print("FARM JSON: ")
        print(farm_generated_json) # debugging

        farm_pretty_json = farm_generated_json.json()

        interactions_response = client.beta.chat.completions.parse(
            model=model_version,  # Use GPT model that supports structured output
            messages=[
                {"role": "system", "content": "Extract the interactions information."},
                {"role": "user", "content": specification}
            ],
            response_format=Interactions,
        )

        if 'error' in interactions_response:
            raise ValueError(f"API error: {interactions_response['error']['message']}")
            
        interactions_generated_json = interactions_response.choices[0].message.parsed

        print("INTERACTIONS JSON: ")
        print(interactions_generated_json) # debugging 2

        interactions_pretty_json = interactions_generated_json.json()


        trial_response = client.beta.chat.completions.parse(
            model=model_version,  # Use GPT model that supports structured output
            messages=[
                {"role": "system", "content": "Extract the trial information."},
                {"role": "user", "content": specification}
            ],
            response_format=Trial,
        )
        
        if 'error' in trial_response:
            raise ValueError(f"API error: {trial_response['error']['message']}")
        
        trial_generated_json = trial_response.choices[0].message.parsed

        print("TRIALS JSON: ")
        print(trial_generated_json) # debugging 3

        trial_pretty_json = trial_generated_json.json()

        return farm_pretty_json, interactions_pretty_json, trial_pretty_json

    except ValidationError as e:
        return {"error": str(e)}
    except Exception as e:
        return {"error": "Failed to generate valid JSON. " + str(e)}

# This is for the step-wise JSON creation
def generate_json_pieces(specification, model_version, additional_json_creation_options, field_data_input, planting_data_input, logs_data_input, soil_data_input, yield_data_input): 
    
    if additional_json_creation_options == "Explicit specific pieces":
        field_data_specification = field_data_input
        planting_data_specification = planting_data_input
        logs_data_specification = logs_data_input
        soil_data_specification = soil_data_input
        yield_data_specification = yield_data_input
        
    elif additional_json_creation_options == "Parse from one big input text":
        field_data_specification = specification
        planting_data_specification = specification
        logs_data_specification = specification
        soil_data_specification = specification
        yield_data_specification = specification
    
    try:
        # Call OpenAI API to generate structured output based on prompt
        field_response = client.beta.chat.completions.parse(
            model=model_version,  # Use GPT model that supports structured output
            messages=[
                {"role": "system", "content": "Extract the field information."},
                {"role": "user", "content": field_data_specification}
            ],
            response_format=FarmActivitiesLite,
        )
        
        plant_response = client.beta.chat.completions.parse(
            model=model_version,  # Use GPT model that supports structured output
            messages=[
                {"role": "system", "content": "Extract the planting information."},
                {"role": "user", "content": planting_data_specification}
            ],
            response_format=PlantingLite,
        )

        log_response = client.beta.chat.completions.parse(
            model=model_version,  # Use GPT model that supports structured output
            messages=[
                {"role": "system", "content": "Extract the planting information."},
                {"role": "user", "content": logs_data_specification}
            ],
            response_format=Log,
        )

        soil_response = client.beta.chat.completions.parse(
            model=model_version,  # Use GPT model that supports structured output
            messages=[
                {"role": "system", "content": "Extract the planting information."},
                {"role": "user", "content": soil_data_specification}
            ],
            response_format=Soil,
        )

        yield_response = client.beta.chat.completions.parse(
            model=model_version,  # Use GPT model that supports structured output
            messages=[
                {"role": "system", "content": "Extract the planting information."},
                {"role": "user", "content": yield_data_specification}
            ],
            response_format=Yield,
        )
        
        combined_json = field_response.choices[0].message.parsed.copy()
        combined_json["plantings"] = plant_response.choices[0].message.parsed
        combined_json["plantings"]["logs"] = log_response.choices[0].message.parsed
        combined_json["plantings"]["soil"] = soil_response.choices[0].message.parsed
        combined_json["plantings"]["yield"] = yield_response.choices[0].message.parsed
        
        print(combined_json) # debugging

        pretty_json = combined_json.json()

        if 'error' in response:
            raise ValueError(f"API error: {response['error']['message']}")
        
        return pretty_json

    except ValidationError as e:
        return {"error": str(e)}
    except Exception as e:
        return {"error": "Failed to generate valid JSON. " + str(e)}
    
def process_specifications(data, model_version, json_creation, additional_json_creation_options, field_data_input, planting_data_input, logs_data_input, soil_data_input, yield_data_input):
    # This method just drives the process

    # Uncomment when working on flippers
    #if json_creation == "Single JSON Creation":
    #    resulting_schema = generate_json(data, model_version)
    #elif json_creation == "Step-wise JSON Creation":
    #    resulting_schema = generate_json_pieces(data, model_version, additional_json_creation_options, field_data_input, planting_data_input, logs_data_input, soil_data_input, yield_data_input) 
    #return resulting_schema 
    global original_outputs, xml_outputs
    
    output1, output2, output3 = generate_json(data, model_version)
    original_outputs = [output1, output2, output3]
    xml_outputs = []
    
    return output1, output2, output3, Toggle(visible=True)

with gr.Blocks() as demo: 
    data_input = gr.Textbox(label="Enter your data", placeholder="Type your data here")
    model_version_input = gr.Radio(["gpt-4o-2024-08-06", "gpt-4o-mini-2024-07-18"], label="Model Versions")

    # Hidden for demo purposes 
    json_creation_input = gr.Radio(["Single JSON Creation", "Step-wise JSON Creation"], label="Modularity of JSON Approach", visible=False)

    additional_json_creation_options = gr.Radio(["Parse from one big input text", "Explicit specific pieces"], label="Additional Step-wise JSON Options", visible=False)

    # Explicit Specific Pieces
    field_data_input = gr.Textbox(label="Enter your data for field", placeholder="Field Name and Description", visible=False)
    planting_data_input = gr.Textbox(label="Enter your data for plantings", placeholder="Name, Status (active/archived), Crop, Crop variety", visible=False)
    logs_data_input = gr.Textbox(label="Enter your log data", placeholder="Convention, Date, Description", visible=False)
    soil_data_input = gr.Textbox(label="Enter your soil data", placeholder="Description, Structure, Biology", visible=False)
    yield_data_input = gr.Textbox(label="Enter your yield data", placeholder="Quantity, Quality", visible=False)
    
    with gr.Row():
        farm_output_box = gr.Textbox(label="Fields and Activities Output Data", interactive=False)
        interactions_output_box = gr.Textbox(label="Interactions Output Data", interactive=False)
        trials_output_box = gr.Textbox(label="Trials Output Data", interactive=False, info="Treatment learnings, variables (control, independent and outcome), and confounding factors are currently NOT included (as they break everything)")
    
    def update_visibility(radio, additional_options):
        value = radio 
        if value == "Single JSON Creation":
            return [gr.Radio(visible=bool(0)), gr.Textbox(visible=bool(0)), gr.Textbox(visible=bool(0)), gr.Textbox(visible=bool(0)), gr.Textbox(visible=bool(0)), gr.Textbox(visible=bool(0))] 
        elif value == "Step-wise JSON Creation" and (additional_options == None or additional_options == "Parse from one big input text"):
            return [gr.Radio(visible=bool(1)), gr.Textbox(visible=bool(0)), gr.Textbox(visible=bool(0)), gr.Textbox(visible=bool(0)), gr.Textbox(visible=bool(0)), gr.Textbox(visible=bool(0))] 
        else:
            return [gr.Radio(visible=bool(1)),  gr.Textbox(visible=bool(1)), gr.Textbox(visible=bool(1)), gr.Textbox(visible=bool(1)), gr.Textbox(visible=bool(1)), gr.Textbox(visible=bool(1))]
            
    def update_visibility2(radio):
        value = radio  
        if value == "Explicit specific pieces":
            return [gr.Textbox(visible=bool(0)), gr.Textbox(visible=bool(1)), gr.Textbox(visible=bool(1)), gr.Textbox(visible=bool(1)), gr.Textbox(visible=bool(1)), gr.Textbox(visible=bool(1))]
        else:
            return [gr.Textbox(visible=bool(1)), gr.Textbox(visible=bool(0)), gr.Textbox(visible=bool(0)), gr.Textbox(visible=bool(0)), gr.Textbox(visible=bool(0)), gr.Textbox(visible=bool(0))]

    def update_toggle(toggle, farm_output_box, interactions_output_box, trials_output_box):
        global original_outputs, xml_outputs
        if toggle and not xml_outputs:
            farm_dict = json.loads(farm_output_box)
            interactions_dict = json.loads(interactions_output_box)
            trials_dict = json.loads(trials_output_box)

            farm_xml = dicttoxml(farm_dict)
            interactions_xml = dicttoxml(interactions_dict)
            trials_xml = dicttoxml(trials_dict)
            
            xml_outputs = [farm_xml, interactions_xml, trials_xml]
            return farm_xml, interactions_xml, trials_xml
        elif toggle and xml_outputs:
            return xml_outputs[0], xml_outputs[1], xml_outputs[2]
        else:
            return original_outputs[0], original_outputs[1], original_outputs[2]
        
    json_creation_input.change(fn=update_visibility, inputs=[json_creation_input, additional_json_creation_options], outputs=[additional_json_creation_options, field_data_input, planting_data_input, logs_data_input, soil_data_input, yield_data_input])
    additional_json_creation_options.change(fn=update_visibility2, inputs=[additional_json_creation_options], outputs=[data_input, field_data_input, planting_data_input, logs_data_input, soil_data_input, yield_data_input])

    toggle_output = Toggle(label="JSON <-> XML", value=False, info="Toggle Output Data", interactive=True, visible=False)
    
    submit_button = gr.Button("Generate JSON")
    submit_button.click(
        fn=process_specifications,
        inputs=[data_input, model_version_input, json_creation_input, additional_json_creation_options, field_data_input, planting_data_input, logs_data_input, soil_data_input, yield_data_input],
        outputs=[farm_output_box, interactions_output_box, trials_output_box, toggle_output]
    )

    clear_button = gr.ClearButton(components=[data_input, model_version_input, json_creation_input, additional_json_creation_options, field_data_input, planting_data_input, logs_data_input, soil_data_input, yield_data_input])
    toggle_output.change(fn=update_toggle, inputs=[toggle_output, farm_output_box, interactions_output_box, trials_output_box], outputs=[farm_output_box, interactions_output_box, trials_output_box])

if __name__ == "__main__":
    demo.launch()