Update script_for_automation.py
Browse files- script_for_automation.py +101 -14
script_for_automation.py
CHANGED
|
@@ -1,5 +1,8 @@
|
|
| 1 |
import requests
|
| 2 |
from jsondiff import diff
|
|
|
|
|
|
|
|
|
|
| 3 |
|
| 4 |
# The purpose of this script is to automate running a bunch of tests
|
| 5 |
# This script will take an input folder
|
|
@@ -255,12 +258,57 @@ def fill_out_survey(recipe_dict, input_data):
|
|
| 255 |
except requests.exceptions.RequestException as e:
|
| 256 |
print(f"An error occurred while submitting the data: {e}")
|
| 257 |
|
| 258 |
-
def get_data_ready(recipe_dict,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 259 |
|
| 260 |
|
| 261 |
def drive_process():
|
| 262 |
# this is to drive the processing process
|
|
|
|
|
|
|
| 263 |
gold_standards, input_data = get_baserow_data()
|
|
|
|
|
|
|
| 264 |
my_recipes = get_recipes()
|
| 265 |
|
| 266 |
# Input chunk structure
|
|
@@ -269,31 +317,70 @@ def drive_process():
|
|
| 269 |
# "otter_summary": liz_carrot_otter_summary_preprocessing,
|
| 270 |
# "greg_summary": liz_carrot_greg_summary_preprocessing
|
| 271 |
# },
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 272 |
|
| 273 |
for recipe_dict in my_recipes:
|
| 274 |
for key, input_chunks in input_data.items():
|
|
|
|
|
|
|
| 275 |
if recipe_dict["pre_processing_strategy"] == "Otter.ai Summary":
|
| 276 |
-
|
| 277 |
elif recipe_dict["pre_processing_strategy"] == "Greg Summary":
|
| 278 |
-
|
| 279 |
else:
|
| 280 |
-
|
| 281 |
-
|
|
|
|
| 282 |
fill_out_survey(recipe_dict, input_data)
|
| 283 |
-
|
|
|
|
|
|
|
| 284 |
completed_json = process_specifications(proc_spec)
|
| 285 |
|
| 286 |
|
| 287 |
-
#
|
| 288 |
-
|
| 289 |
-
|
|
|
|
| 290 |
|
| 291 |
# Convert to yaml
|
| 292 |
-
|
| 293 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 294 |
|
| 295 |
-
|
| 296 |
-
|
|
|
|
|
|
|
| 297 |
|
| 298 |
-
return
|
| 299 |
|
|
|
|
| 1 |
import requests
|
| 2 |
from jsondiff import diff
|
| 3 |
+
import yaml
|
| 4 |
+
import pandas as pd
|
| 5 |
+
import os
|
| 6 |
|
| 7 |
# The purpose of this script is to automate running a bunch of tests
|
| 8 |
# This script will take an input folder
|
|
|
|
| 258 |
except requests.exceptions.RequestException as e:
|
| 259 |
print(f"An error occurred while submitting the data: {e}")
|
| 260 |
|
| 261 |
+
def get_data_ready(recipe_dict, input_data_piece):
|
| 262 |
+
## Input chunk structure
|
| 263 |
+
# "raw_interview": liz_carrot_input_data_raw_interview,
|
| 264 |
+
#
|
| 265 |
+
#
|
| 266 |
+
# recipe_dict = {
|
| 267 |
+
# "recipe_id": recipe_id,
|
| 268 |
+
# "testing_strategy_text": testing_strategy_text,
|
| 269 |
+
# "schema_processing_model", schema_processing_model,
|
| 270 |
+
# "pre_processing_strategy", pre_processing_strategy,
|
| 271 |
+
# "pre_processing_text", pre_processing_text,
|
| 272 |
+
# "pre_processing_model", pre_processing_model,
|
| 273 |
+
# "prompting_strategy", prompting_strategy,
|
| 274 |
+
# "plantings_and_fields_prompt", plantings_and_fields_prompt,
|
| 275 |
+
# "interactions_prompt", interactions_prompt,
|
| 276 |
+
# "treatments_prompt", treatments_prompt
|
| 277 |
+
# }
|
| 278 |
+
#
|
| 279 |
+
processed_data = {}
|
| 280 |
+
processed_data["input_style"] = 'big-block-input-text'
|
| 281 |
+
processed_data["input_text"] = input_data_piece
|
| 282 |
+
processed_data["prompts"]["firstschemaprompt"] = recipe_dict["plantings_and_fields_prompt"]
|
| 283 |
+
processed_data["prompts"]["secondschemaprompt"] = recipe_dict["interactions_prompt"]
|
| 284 |
+
processed_data["prompts"]["thirdschemaprompt"] = recipe_dict["treatments_prompt"]
|
| 285 |
+
|
| 286 |
+
processed_data["parameters"] = {}
|
| 287 |
+
processed_data["parameters"]["modelversion"] = recipe_dict["schema_processing_model"]
|
| 288 |
+
processed_data["parameters"]["promptstyle"] = recipe_dict["prompting_strategy"]
|
| 289 |
+
if recipe_dict["pre_processing_text"] is None and recipe_dict["pre_processing_strategy"] is None:
|
| 290 |
+
processed_data["parameters"]["preprocessdata"] = "no"
|
| 291 |
+
else:
|
| 292 |
+
processed_data["parameters"]["preprocessdata"] = "yes"
|
| 293 |
+
processed_data["parameters"]["preprocessmodelversion"] = recipe_dict["pre_processing_model"]
|
| 294 |
+
processed_data["parameters"]["multiplepreprompts"] = "no"
|
| 295 |
+
processed_data["parameters"]["prepromptstyle"] = recipe_dict["pre_processing_strategy"]
|
| 296 |
+
processed_data["parameters"]["preprocessingprompt1"] = recipe_dict["pre_processing_text"]
|
| 297 |
+
processed_data["parameters"]["preprocessingprompt2"] = ""
|
| 298 |
+
processed_data["parameters"]["preprocessingprompt3"] = ""
|
| 299 |
+
|
| 300 |
+
return processed_data
|
| 301 |
+
|
| 302 |
+
|
| 303 |
|
| 304 |
|
| 305 |
def drive_process():
|
| 306 |
# this is to drive the processing process
|
| 307 |
+
|
| 308 |
+
# Get the data from baserow (gold standards JSON and Input data)
|
| 309 |
gold_standards, input_data = get_baserow_data()
|
| 310 |
+
|
| 311 |
+
# Get the recipes from baserow too
|
| 312 |
my_recipes = get_recipes()
|
| 313 |
|
| 314 |
# Input chunk structure
|
|
|
|
| 317 |
# "otter_summary": liz_carrot_otter_summary_preprocessing,
|
| 318 |
# "greg_summary": liz_carrot_greg_summary_preprocessing
|
| 319 |
# },
|
| 320 |
+
|
| 321 |
+
output_rows = []
|
| 322 |
+
output_folder = "output_files"
|
| 323 |
+
if not os.path.exists(output_folder)
|
| 324 |
+
os.makedirs(output_folder)
|
| 325 |
|
| 326 |
for recipe_dict in my_recipes:
|
| 327 |
for key, input_chunks in input_data.items():
|
| 328 |
+
|
| 329 |
+
# Get the input data based on the recipe
|
| 330 |
if recipe_dict["pre_processing_strategy"] == "Otter.ai Summary":
|
| 331 |
+
input_data_piece = input_chunks["otter_summary"]
|
| 332 |
elif recipe_dict["pre_processing_strategy"] == "Greg Summary":
|
| 333 |
+
input_data_piece = input_chunks["greg_summary"]
|
| 334 |
else:
|
| 335 |
+
input_data_piece = input_chunks["raw_interview"]
|
| 336 |
+
|
| 337 |
+
# Fill out a Surveystack submission
|
| 338 |
fill_out_survey(recipe_dict, input_data)
|
| 339 |
+
|
| 340 |
+
# Prepare the data for the structured output setup
|
| 341 |
+
proc_spec = get_data_ready(recipe_dict, input_data_piece)
|
| 342 |
completed_json = process_specifications(proc_spec)
|
| 343 |
|
| 344 |
|
| 345 |
+
# Get the gold standard for this input_chunk (liz_carrot, ben_soybean, wally_squash)
|
| 346 |
+
# Compare the generated JSON to the gold standard
|
| 347 |
+
gold_standard_json = gold_standard[key]
|
| 348 |
+
differences = list(diff(gold_standard_json, completed_json))
|
| 349 |
|
| 350 |
# Convert to yaml
|
| 351 |
+
gold_standard_yaml = yaml.dump(gold_standard_json, default_flow_style=False)
|
| 352 |
+
comparison_yaml = yaml.dump(completed_json, default_flow_style=False)
|
| 353 |
+
|
| 354 |
+
recipe_id = recipe_dict.get("recipe_id", "N/A")
|
| 355 |
+
output_rows.append({
|
| 356 |
+
"Recipe_ID": recipe_id,
|
| 357 |
+
"Testing_Strategy_Text": recipe_dict.get("testing_strategy_text", "N/A"),
|
| 358 |
+
"Schema_Processing_Model": recipe_dict.get("schema_processing_model", "N/A"),
|
| 359 |
+
"Pre_Processing_Strategy": recipe_dict.get("pre_processing_strategy", "N/A"),
|
| 360 |
+
"Pre_Processing_Text": recipe_dict.get("pre_processing_text", "N/A"),
|
| 361 |
+
"Pre_Processing_Model": recipe_dict.get("pre_processing_model", "N/A"),
|
| 362 |
+
"Prompting_Strategy": recipe_dict.get("prompting_strategy", "N/A"),
|
| 363 |
+
"Plantings_and_Fields_Prompt": recipe_dict.get("plantings_and_fields_prompt", "N/A"),
|
| 364 |
+
"Interactions_Prompt": recipe_dict.get("interactions_prompt", "N/A"),
|
| 365 |
+
"Treatments_Prompt": recipe_dict.get("treatments_prompt", "N/A"),
|
| 366 |
+
"Input_Transcript": input_data,
|
| 367 |
+
"Gold_Standard_Key_Values": json.dumps(gold_standard_json, indent=2),
|
| 368 |
+
"Machine_Generated_Key_Values": json.dumps(completed_json, indent=2),
|
| 369 |
+
"Differences": json.dumps(differences, indent=2),
|
| 370 |
+
"Gold_Standard_YAML": gold_standard_yaml,
|
| 371 |
+
"Machine_Generated_YAML": comparison_yaml
|
| 372 |
+
})
|
| 373 |
+
|
| 374 |
+
df = pd.DataFrame(output_rows)
|
| 375 |
+
|
| 376 |
+
markdown_output = generate_markdown_output(df)
|
| 377 |
+
output_file = f"/output/recipe_run_{recipe_id}_{key}.md"
|
| 378 |
+
output_file_path = os.path.join(output_folder, output_file_name)
|
| 379 |
|
| 380 |
+
with open(output_file_path, "w") as file:
|
| 381 |
+
file.write(markdown_output)
|
| 382 |
+
|
| 383 |
+
print(f"Markdown file saved at {output_file_path}")
|
| 384 |
|
| 385 |
+
return output_folder
|
| 386 |
|