Spaces:

our-sci
/

data-translation-experiments

Sleeping

App Files Files Community

rosemariafontana commited on Oct 28, 2024

Commit

e34408d

verified ·

1 Parent(s): fa532ba

Create process_data.py

Browse files

Files changed (1) hide show

process_data.py +186 -0

process_data.py ADDED Viewed

	@@ -0,0 +1,186 @@

+import os
+from pydantic import BaseModel, Field, validator, ValidationError
+import gradio as gr
+from openai import OpenAI
+from typing import List, Dict, Any, Optional, Literal, Union
+from enum import Enum
+from gradio_toggle import Toggle
+from dicttoxml import dicttoxml
+import json
+# adding comment
+# Chatbot model
+os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")
+client = OpenAI()
+# This is to make stuff happen
+def generate_json(input_data, model_version):
+    """
+    Function to prompt OpenAI API to generate structured JSON output.
+    """
+    try:
+        #Call OpenAI API to generate structured output based on prompt
+        farm_info_response = client.beta.chat.completions.parse(
+            model=model_version,  # Use GPT model that supports structured output
+            messages=[
+                {"role": "system", "content": "Extract the farm information."},
+                {"role": "user", "content": input_data}
+            ],
+            response_format=FarmActivities,
+        )
+        if 'error' in farm_info_response:
+            raise ValueError(f"API error: {interactions_response['error']['message']}")
+        farm_generated_json = farm_info_response.choices[0].message.parsed
+        print("FARM JSON: ")
+        print(farm_generated_json) # debugging
+        farm_pretty_json = farm_generated_json.json()
+        interactions_response = client.beta.chat.completions.parse(
+            model=model_version,  # Use GPT model that supports structured output
+            messages=[
+                {"role": "system", "content": "Extract the interactions information."},
+                {"role": "user", "content": specification}
+            ],
+            response_format=Interactions,
+        )
+        if 'error' in interactions_response:
+            raise ValueError(f"API error: {interactions_response['error']['message']}")
+        interactions_generated_json = interactions_response.choices[0].message.parsed
+        print("INTERACTIONS JSON: ")
+        print(interactions_generated_json) # debugging 2
+        interactions_pretty_json = interactions_generated_json.json()
+        trial_response = client.beta.chat.completions.parse(
+            model=model_version,  # Use GPT model that supports structured output
+            messages=[
+                {"role": "system", "content": "Extract the trial information."},
+                {"role": "user", "content": specification}
+            ],
+            response_format=Trial,
+        )
+        if 'error' in trial_response:
+            raise ValueError(f"API error: {trial_response['error']['message']}")
+        trial_generated_json = trial_response.choices[0].message.parsed
+        print("TRIALS JSON: ")
+        print(trial_generated_json) # debugging 3
+        trial_pretty_json = trial_generated_json.json()
+        return farm_pretty_json, interactions_pretty_json, trial_pretty_json
+    except ValidationError as e:
+        return {"error": str(e)}
+    except Exception as e:
+        return {"error": "Failed to generate valid JSON. " + str(e)}
+# This is for the step-wise JSON creation
+def generate_json_pieces(specification, model_version, additional_json_creation_options, field_data_input, planting_data_input, logs_data_input, soil_data_input, yield_data_input):
+    if additional_json_creation_options == "Explicit specific pieces":
+        field_data_specification = field_data_input
+        planting_data_specification = planting_data_input
+        logs_data_specification = logs_data_input
+        soil_data_specification = soil_data_input
+        yield_data_specification = yield_data_input
+    elif additional_json_creation_options == "Parse from one big input text":
+        field_data_specification = specification
+        planting_data_specification = specification
+        logs_data_specification = specification
+        soil_data_specification = specification
+        yield_data_specification = specification
+    try:
+        # Call OpenAI API to generate structured output based on prompt
+        field_response = client.beta.chat.completions.parse(
+            model=model_version,  # Use GPT model that supports structured output
+            messages=[
+                {"role": "system", "content": "Extract the field information."},
+                {"role": "user", "content": field_data_specification}
+            ],
+            response_format=FarmActivitiesLite,
+        )
+        plant_response = client.beta.chat.completions.parse(
+            model=model_version,  # Use GPT model that supports structured output
+            messages=[
+                {"role": "system", "content": "Extract the planting information."},
+                {"role": "user", "content": planting_data_specification}
+            ],
+            response_format=PlantingLite,
+        )
+        log_response = client.beta.chat.completions.parse(
+            model=model_version,  # Use GPT model that supports structured output
+            messages=[
+                {"role": "system", "content": "Extract the planting information."},
+                {"role": "user", "content": logs_data_specification}
+            ],
+            response_format=Log,
+        )
+        soil_response = client.beta.chat.completions.parse(
+            model=model_version,  # Use GPT model that supports structured output
+            messages=[
+                {"role": "system", "content": "Extract the planting information."},
+                {"role": "user", "content": soil_data_specification}
+            ],
+            response_format=Soil,
+        )
+        yield_response = client.beta.chat.completions.parse(
+            model=model_version,  # Use GPT model that supports structured output
+            messages=[
+                {"role": "system", "content": "Extract the planting information."},
+                {"role": "user", "content": yield_data_specification}
+            ],
+            response_format=Yield,
+        )
+        combined_json = field_response.choices[0].message.parsed.copy()
+        combined_json["plantings"] = plant_response.choices[0].message.parsed
+        combined_json["plantings"]["logs"] = log_response.choices[0].message.parsed
+        combined_json["plantings"]["soil"] = soil_response.choices[0].message.parsed
+        combined_json["plantings"]["yield"] = yield_response.choices[0].message.parsed
+        print(combined_json) # debugging
+        pretty_json = combined_json.json()
+        if 'error' in response:
+            raise ValueError(f"API error: {response['error']['message']}")
+        return pretty_json
+    except ValidationError as e:
+        return {"error": str(e)}
+    except Exception as e:
+        return {"error": "Failed to generate valid JSON. " + str(e)}
+def process_specifications(data, model_version, json_creation, additional_json_creation_options, field_data_input, planting_data_input, logs_data_input, soil_data_input, yield_data_input):
+    # This method just drives the process
+    # Uncomment when working on flippers
+    #if json_creation == "Single JSON Creation":
+    #    resulting_schema = generate_json(data, model_version)
+    #elif json_creation == "Step-wise JSON Creation":
+    #    resulting_schema = generate_json_pieces(data, model_version, additional_json_creation_options, field_data_input, planting_data_input, logs_data_input, soil_data_input, yield_data_input)
+    #return resulting_schema
+    global original_outputs, xml_outputs
+    output1, output2, output3 = generate_json(data, model_version)
+    return output1, output2, output3