rosemariafontana commited on
Commit
e34408d
·
verified ·
1 Parent(s): fa532ba

Create process_data.py

Browse files
Files changed (1) hide show
  1. process_data.py +186 -0
process_data.py ADDED
@@ -0,0 +1,186 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from pydantic import BaseModel, Field, validator, ValidationError
3
+ import gradio as gr
4
+ from openai import OpenAI
5
+ from typing import List, Dict, Any, Optional, Literal, Union
6
+ from enum import Enum
7
+ from gradio_toggle import Toggle
8
+ from dicttoxml import dicttoxml
9
+ import json
10
+
11
+ # adding comment
12
+ # Chatbot model
13
+ os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")
14
+ client = OpenAI()
15
+
16
+ # This is to make stuff happen
17
+ def generate_json(input_data, model_version):
18
+ """
19
+ Function to prompt OpenAI API to generate structured JSON output.
20
+ """
21
+
22
+ try:
23
+ #Call OpenAI API to generate structured output based on prompt
24
+ farm_info_response = client.beta.chat.completions.parse(
25
+ model=model_version, # Use GPT model that supports structured output
26
+ messages=[
27
+ {"role": "system", "content": "Extract the farm information."},
28
+ {"role": "user", "content": input_data}
29
+ ],
30
+ response_format=FarmActivities,
31
+ )
32
+
33
+ if 'error' in farm_info_response:
34
+ raise ValueError(f"API error: {interactions_response['error']['message']}")
35
+
36
+ farm_generated_json = farm_info_response.choices[0].message.parsed
37
+ print("FARM JSON: ")
38
+ print(farm_generated_json) # debugging
39
+
40
+ farm_pretty_json = farm_generated_json.json()
41
+
42
+ interactions_response = client.beta.chat.completions.parse(
43
+ model=model_version, # Use GPT model that supports structured output
44
+ messages=[
45
+ {"role": "system", "content": "Extract the interactions information."},
46
+ {"role": "user", "content": specification}
47
+ ],
48
+ response_format=Interactions,
49
+ )
50
+
51
+ if 'error' in interactions_response:
52
+ raise ValueError(f"API error: {interactions_response['error']['message']}")
53
+
54
+ interactions_generated_json = interactions_response.choices[0].message.parsed
55
+
56
+ print("INTERACTIONS JSON: ")
57
+ print(interactions_generated_json) # debugging 2
58
+
59
+ interactions_pretty_json = interactions_generated_json.json()
60
+
61
+
62
+ trial_response = client.beta.chat.completions.parse(
63
+ model=model_version, # Use GPT model that supports structured output
64
+ messages=[
65
+ {"role": "system", "content": "Extract the trial information."},
66
+ {"role": "user", "content": specification}
67
+ ],
68
+ response_format=Trial,
69
+ )
70
+
71
+ if 'error' in trial_response:
72
+ raise ValueError(f"API error: {trial_response['error']['message']}")
73
+
74
+ trial_generated_json = trial_response.choices[0].message.parsed
75
+
76
+ print("TRIALS JSON: ")
77
+ print(trial_generated_json) # debugging 3
78
+
79
+ trial_pretty_json = trial_generated_json.json()
80
+
81
+ return farm_pretty_json, interactions_pretty_json, trial_pretty_json
82
+
83
+ except ValidationError as e:
84
+ return {"error": str(e)}
85
+ except Exception as e:
86
+ return {"error": "Failed to generate valid JSON. " + str(e)}
87
+
88
+ # This is for the step-wise JSON creation
89
+ def generate_json_pieces(specification, model_version, additional_json_creation_options, field_data_input, planting_data_input, logs_data_input, soil_data_input, yield_data_input):
90
+
91
+ if additional_json_creation_options == "Explicit specific pieces":
92
+ field_data_specification = field_data_input
93
+ planting_data_specification = planting_data_input
94
+ logs_data_specification = logs_data_input
95
+ soil_data_specification = soil_data_input
96
+ yield_data_specification = yield_data_input
97
+
98
+ elif additional_json_creation_options == "Parse from one big input text":
99
+ field_data_specification = specification
100
+ planting_data_specification = specification
101
+ logs_data_specification = specification
102
+ soil_data_specification = specification
103
+ yield_data_specification = specification
104
+
105
+ try:
106
+ # Call OpenAI API to generate structured output based on prompt
107
+ field_response = client.beta.chat.completions.parse(
108
+ model=model_version, # Use GPT model that supports structured output
109
+ messages=[
110
+ {"role": "system", "content": "Extract the field information."},
111
+ {"role": "user", "content": field_data_specification}
112
+ ],
113
+ response_format=FarmActivitiesLite,
114
+ )
115
+
116
+ plant_response = client.beta.chat.completions.parse(
117
+ model=model_version, # Use GPT model that supports structured output
118
+ messages=[
119
+ {"role": "system", "content": "Extract the planting information."},
120
+ {"role": "user", "content": planting_data_specification}
121
+ ],
122
+ response_format=PlantingLite,
123
+ )
124
+
125
+ log_response = client.beta.chat.completions.parse(
126
+ model=model_version, # Use GPT model that supports structured output
127
+ messages=[
128
+ {"role": "system", "content": "Extract the planting information."},
129
+ {"role": "user", "content": logs_data_specification}
130
+ ],
131
+ response_format=Log,
132
+ )
133
+
134
+ soil_response = client.beta.chat.completions.parse(
135
+ model=model_version, # Use GPT model that supports structured output
136
+ messages=[
137
+ {"role": "system", "content": "Extract the planting information."},
138
+ {"role": "user", "content": soil_data_specification}
139
+ ],
140
+ response_format=Soil,
141
+ )
142
+
143
+ yield_response = client.beta.chat.completions.parse(
144
+ model=model_version, # Use GPT model that supports structured output
145
+ messages=[
146
+ {"role": "system", "content": "Extract the planting information."},
147
+ {"role": "user", "content": yield_data_specification}
148
+ ],
149
+ response_format=Yield,
150
+ )
151
+
152
+ combined_json = field_response.choices[0].message.parsed.copy()
153
+ combined_json["plantings"] = plant_response.choices[0].message.parsed
154
+ combined_json["plantings"]["logs"] = log_response.choices[0].message.parsed
155
+ combined_json["plantings"]["soil"] = soil_response.choices[0].message.parsed
156
+ combined_json["plantings"]["yield"] = yield_response.choices[0].message.parsed
157
+
158
+ print(combined_json) # debugging
159
+
160
+ pretty_json = combined_json.json()
161
+
162
+ if 'error' in response:
163
+ raise ValueError(f"API error: {response['error']['message']}")
164
+
165
+ return pretty_json
166
+
167
+ except ValidationError as e:
168
+ return {"error": str(e)}
169
+ except Exception as e:
170
+ return {"error": "Failed to generate valid JSON. " + str(e)}
171
+
172
+ def process_specifications(data, model_version, json_creation, additional_json_creation_options, field_data_input, planting_data_input, logs_data_input, soil_data_input, yield_data_input):
173
+ # This method just drives the process
174
+
175
+ # Uncomment when working on flippers
176
+ #if json_creation == "Single JSON Creation":
177
+ # resulting_schema = generate_json(data, model_version)
178
+ #elif json_creation == "Step-wise JSON Creation":
179
+ # resulting_schema = generate_json_pieces(data, model_version, additional_json_creation_options, field_data_input, planting_data_input, logs_data_input, soil_data_input, yield_data_input)
180
+ #return resulting_schema
181
+ global original_outputs, xml_outputs
182
+
183
+ output1, output2, output3 = generate_json(data, model_version)
184
+
185
+
186
+ return output1, output2, output3