Spaces:

our-sci
/

data-translation-experiments

Sleeping

File size: 12,744 Bytes

e34408d
 
 
 
 
 
 
 
 
9f31d16
 
e34408d
 
 
 
 
b5f8aca
0aa2def
e34408d
 
 
 
b3c2c53
0aa2def
b3c2c53
 
 
 
0aa2def
b3c2c53
 
 
 
024da2b
 
b3c2c53
e34408d
 
b3c2c53
e34408d
 
 
b3c2c53
 
e34408d
 
 
 
 
b3c2c53
 
e34408d
 
 
 
b3c2c53
e34408d
 
 
b3c2c53
 
e34408d
 
 
 
 
 
 
 
 
 
 
 
 
 
b3c2c53
 
e34408d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b3c2c53
e34408d
 
 
 
 
 
 
 
b3c2c53
e34408d
 
 
 
 
 
 
 
b3c2c53
e34408d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2e0db7d
 
e34408d
 
 
 
 
 
 
2e0db7d
e34408d
2e0db7d
e34408d
 
2e0db7d
 
b3c2c53
 
 
2585bcf
b3c2c53
 
2585bcf
be8ba1e
 
 
b3c2c53
 
be8ba1e
024da2b
be8ba1e
 
 
 
1c46c98
 
be8ba1e
024da2b
 
 
 
 
 
 
a3a6c15
4a9aba9
 
 
a3a6c15
 
 
2c24fde
b3c2c53
 
4a9aba9
b3c2c53
 
 
 
 
2585bcf
b3c2c53
 
 
 
 
bd6201c
b5f8aca
b3c2c53
0aa2def
 
b3c2c53
a69d150
bdf951a
0aa2def
bd6201c
 
9a6b654
8530295
9a6b654
 
14ebfe3
e2fbcf5
 
 
27da7d9
38c1c61
08592d7
e2fbcf5
ab95004
27da7d9
 
 
ab95004
 
 
 
27da7d9
 
ab95004
27da7d9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c5dfa16
27da7d9
 
 
 
 
 
0aa2def
b3c2c53
27da7d9
 
c5dfa16
8530295
9a6b654
8530295
9a6b654
 
 
bd6201c
8530295
 
2e0db7d
9a6b654

import os
from pydantic import BaseModel, Field, validator, ValidationError
import gradio as gr
from openai import OpenAI
from typing import List, Dict, Any, Optional, Literal, Union
from enum import Enum
from gradio_toggle import Toggle
import json

from schema_classes import FarmActivities, Interactions, Trial, FarmActivitiesLite, PlantingLite, Log, Soil, Yield

# adding comment
# Chatbot model 
os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")
client = OpenAI()


def generate_json(input_data, parameters): 
    """
    Function to prompt OpenAI API to generate structured JSON output.
    """

    input_text = input_data["input_text"]
    model_version = parameters["model_version"]

    farm_prompt = "Extract the farm information."
    interactions_prompt = "Extract the interactions information."
    trial_prompt = "Extract the trial information."
    
    if input_data["input_context"]:
        farm_prompt = input_data["input_context"] + farm_prompt
        interactions_prompt = input_data["input_context"] + interactions_prompt
        trial_prompt = input_data["input_context"] + trial_prompt

        
        
    try:
        #Call OpenAI API to generate structured output based on prompt
        
        farm_info_response = client.beta.chat.completions.parse(
            model=model_version,  # Use GPT model that supports structured output
            messages=[
                {"role": "system", "content": farm_prompt},
                {"role": "user", "content": input_text}
            ],
            response_format=FarmActivities,
        )

        farm_generated_json = farm_info_response.choices[0].message.parsed

        
        print("FARM JSON: ")
        print(farm_generated_json) # debugging
        farm_pretty_json = farm_generated_json.json()

        
        interactions_response = client.beta.chat.completions.parse(
            model=model_version,  # Use GPT model that supports structured output
            messages=[
                {"role": "system", "content": interactions_prompt},
                {"role": "user", "content": input_text}
            ],
            response_format=Interactions,
        )
            
        interactions_generated_json = interactions_response.choices[0].message.parsed

        print("INTERACTIONS JSON: ")
        print(interactions_generated_json) # debugging 2
        interactions_pretty_json = interactions_generated_json.json()


        trial_response = client.beta.chat.completions.parse(
            model=model_version,  # Use GPT model that supports structured output
            messages=[
                {"role": "system", "content": trial_prompt},
                {"role": "user", "content": input_text}
            ],
            response_format=Trial,
        )
        
        trial_generated_json = trial_response.choices[0].message.parsed

        print("TRIALS JSON: ")
        print(trial_generated_json) # debugging 3

        trial_pretty_json = trial_generated_json.json()

        return farm_pretty_json, interactions_pretty_json, trial_pretty_json

    except ValidationError as e:
        return {"error": str(e)}
    except Exception as e:
        return {"error": "Failed to generate valid JSON. " + str(e)}

# This is for the step-wise JSON creation
def generate_json_pieces(specification, model_version, additional_json_creation_options, field_data_input, planting_data_input, logs_data_input, soil_data_input, yield_data_input): 
    
    if additional_json_creation_options == "Explicit specific pieces":
        field_data_specification = field_data_input
        planting_data_specification = planting_data_input
        logs_data_specification = logs_data_input
        soil_data_specification = soil_data_input
        yield_data_specification = yield_data_input
        
    elif additional_json_creation_options == "Parse from one big input text":
        field_data_specification = specification
        planting_data_specification = specification
        logs_data_specification = specification
        soil_data_specification = specification
        yield_data_specification = specification
    
    try:
        # Call OpenAI API to generate structured output based on prompt
        field_response = client.beta.chat.completions.parse(
            model=model_version,  # Use GPT model that supports structured output
            messages=[
                {"role": "system", "content": "Extract the field information."},
                {"role": "user", "content": field_data_specification}
            ],
            response_format=FarmActivitiesLite,
        )
        
        plant_response = client.beta.chat.completions.parse(
            model=model_version,  # Use GPT model that supports structured output
            messages=[
                {"role": "system", "content": "Extract the planting information."},
                {"role": "user", "content": planting_data_specification}
            ],
            response_format=PlantingLite,
        )

        log_response = client.beta.chat.completions.parse(
            model=model_version,  # Use GPT model that supports structured output
            messages=[
                {"role": "system", "content": "Extract the log information."},
                {"role": "user", "content": logs_data_specification}
            ],
            response_format=Log,
        )

        soil_response = client.beta.chat.completions.parse(
            model=model_version,  # Use GPT model that supports structured output
            messages=[
                {"role": "system", "content": "Extract the soil information."},
                {"role": "user", "content": soil_data_specification}
            ],
            response_format=Soil,
        )

        yield_response = client.beta.chat.completions.parse(
            model=model_version,  # Use GPT model that supports structured output
            messages=[
                {"role": "system", "content": "Extract the yield information."},
                {"role": "user", "content": yield_data_specification}
            ],
            response_format=Yield,
        )
        
        combined_json = field_response.choices[0].message.parsed.copy()
        combined_json["plantings"] = plant_response.choices[0].message.parsed
        combined_json["plantings"]["logs"] = log_response.choices[0].message.parsed
        combined_json["plantings"]["soil"] = soil_response.choices[0].message.parsed
        combined_json["plantings"]["yield"] = yield_response.choices[0].message.parsed
        
        print(combined_json) # debugging

        pretty_json = combined_json.json()
        
        return pretty_json
    except Exception as e:
        return {"error": "Failed to generate valid JSON. " + str(e)}
    
#def process_specifications(data, model_version, json_creation, additional_json_creation_options, field_data_input, planting_data_input, logs_data_input, soil_data_input, yield_data_input):
#    # This method just drives the process

    # Uncomment when working on flippers
    #if json_creation == "Single JSON Creation":
    #    resulting_schema = generate_json(data, model_version)
    #elif json_creation == "Step-wise JSON Creation":
    #    resulting_schema = generate_json_pieces(data, model_version, additional_json_creation_options, field_data_input, planting_data_input, logs_data_input, soil_data_input, yield_data_input) 
    #return resulting_schema 
#    global original_outputs, xml_outputs
    
#    output1, output2, output3 = generate_json(data, model_version)

    
#    return output1, output2, output3

def pre_processing(input_data, parameters):
    # in the event there's a pre-prompt, process 
    
    if parameters["chaining"]:

        input_text = input_data["input_text"]
        pre_processing_list = [parameters["context_pre_prompt"], parameters["summary_pre_prompt"], parameters["conversation_pre_prompt"], parameters["example_pre_prompt"]]

        print("PreProcessingList")
        print(pre_processing_list)
        for pre_prompt in pre_processing_list:
            try:
                print("Pre-Processing: ")
                if pre_prompt:
                    print("Prompt: ")
                    print(pre_prompt)
                    print("Input Text: ")
                    print(input_text)
                    print("Model: ")
                    print(parameters["model_version"])
                    
                    response = client.chat.completions.create(
                        model=parameters["model_version"],
                        messages=[
                            {"role": "system", "content": pre_prompt},
                            {"role": "user", "content": input_text}
                        ]
                    )

                    
                    response_text = response.choices[0].message.content
                    
                    print("Response Text: ")
                    print(response_text)
                    
                    input_text = response_text
                
            except Exception as e:
                print(f"Failed to parse response as JSON. Error was: {e}")

        input_data["input_context"] = False
        input_data["input_text"] = input_text
        return input_data
    else:
        input_context = f"You are processing farm activity, interactions, and trial data. Here's important context of the data {parameters['combined_prompt']}. With this context in mind, "
        input_data["input_context"] = input_context
        return input_data
        

    
def process_specifications(input_data, parameters):
    # here is where parsing and other things will happen before 
    if parameters["pre_prompt"] == True:
        processed_input = pre_processing(input_data, parameters)
        return generate_json(processed_input, parameters)
    else:
        input_data["input_context"] = False
        input_data["input_text"] = input_data["input_text"]
        return generate_json(input_data, parameters)
    
    
def parse_survey_stack_parameters(data):
    processed_data = {}

    processed_data["model_version"] = data[0]['data']['modelversion']['value'][0]

    print("DATA: ")
    print(data)
    
    try:

        pre_promp_parameters = data[0]['data']['group_2']

        if pre_promp_parameters['preprompt']['value'][0] == 'continue_preprompts':
            processed_data["pre_prompt"] = True
    
            # Accessing context and other prompts, with defaults in case they are None
            processed_data["context_pre_prompt"] = pre_promp_parameters.get('contextpreprompt', {}).get('value', None)
            processed_data["summary_pre_prompt"] = pre_promp_parameters.get('summarypreprompt', {}).get('value', None)
            processed_data["conversation_pre_prompt"] = pre_promp_parameters.get('conversationpreprompt', {}).get('value', None)
            processed_data["example_pre_prompt"] = pre_promp_parameters.get('examplepreprompt', {}).get('value', None)
            
            # Check if chaining is set to "yes" or "no"
            chaining_value = pre_promp_parameters.get('prepromptchaining', {}).get('value', [None])[0]
            
            if chaining_value == "no":
                # Combine prompts if chaining is "no"
                combined_prompt = " ".join(
                    filter(None, [
                        processed_data["context_pre_prompt"], 
                        processed_data["summary_pre_prompt"], 
                        processed_data["conversation_pre_prompt"], 
                        processed_data["example_pre_prompt"]
                    ])
                )
                processed_data["chaining"] = False
                processed_data["combined_prompt"] = combined_prompt
            else:
                # Set combined_pre_prompt to None if chaining is enabled
                processed_data["chaining"] = True
                processed_data["combined_pre_prompt"] = None
        else:
            # Set fields to None if preprompt is not "continue_preprompts"
            processed_data["pre_prompt"] = False
            processed_data["context_pre_prompt"] = None
            processed_data["summary_pre_prompt"] = None
            processed_data["conversation_pre_prompt"] = None
            processed_data["example_pre_prompt"] = None
            processed_data["chaining"] = False
            processed_data["combined_pre_prompt"] = None
            
    except Exception as e:
        print(f"An error occurred: {e}")
    
    return processed_data

def parse_survey_stack_data(data):
    processed_data = {}

    processed_data["input_text"] = data[0]['data']['inputtext']['value']
    
    return processed_data