rosemariafontana commited on
Commit
080b93d
·
verified ·
1 Parent(s): f10f4aa

Update script_for_automation.py

Browse files
Files changed (1) hide show
  1. script_for_automation.py +101 -14
script_for_automation.py CHANGED
@@ -1,5 +1,8 @@
1
  import requests
2
  from jsondiff import diff
 
 
 
3
 
4
  # The purpose of this script is to automate running a bunch of tests
5
  # This script will take an input folder
@@ -255,12 +258,57 @@ def fill_out_survey(recipe_dict, input_data):
255
  except requests.exceptions.RequestException as e:
256
  print(f"An error occurred while submitting the data: {e}")
257
 
258
- def get_data_ready(recipe_dict, input_data):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
259
 
260
 
261
  def drive_process():
262
  # this is to drive the processing process
 
 
263
  gold_standards, input_data = get_baserow_data()
 
 
264
  my_recipes = get_recipes()
265
 
266
  # Input chunk structure
@@ -269,31 +317,70 @@ def drive_process():
269
  # "otter_summary": liz_carrot_otter_summary_preprocessing,
270
  # "greg_summary": liz_carrot_greg_summary_preprocessing
271
  # },
 
 
 
 
 
272
 
273
  for recipe_dict in my_recipes:
274
  for key, input_chunks in input_data.items():
 
 
275
  if recipe_dict["pre_processing_strategy"] == "Otter.ai Summary":
276
- input_data = input_chunks["otter_summary"]
277
  elif recipe_dict["pre_processing_strategy"] == "Greg Summary":
278
- input_data = input_chunks["greg_summary"]
279
  else:
280
- input_data = input_chunks["raw_interview"]
281
-
 
282
  fill_out_survey(recipe_dict, input_data)
283
- proc_spec = get_data_ready(recipe_dict, input_data)
 
 
284
  completed_json = process_specifications(proc_spec)
285
 
286
 
287
- # This is for after doing the stuff with chatgpt actually
288
- gold_standard_dict = gold_standard[key]
289
- difference = diff(gold_standard_dict, completed_json)
 
290
 
291
  # Convert to yaml
292
- # Convert BOTH to yaml
293
- # Side by side
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
294
 
295
- # build file
296
-
 
 
297
 
298
- return "banana"
299
 
 
1
  import requests
2
  from jsondiff import diff
3
+ import yaml
4
+ import pandas as pd
5
+ import os
6
 
7
  # The purpose of this script is to automate running a bunch of tests
8
  # This script will take an input folder
 
258
  except requests.exceptions.RequestException as e:
259
  print(f"An error occurred while submitting the data: {e}")
260
 
261
+ def get_data_ready(recipe_dict, input_data_piece):
262
+ ## Input chunk structure
263
+ # "raw_interview": liz_carrot_input_data_raw_interview,
264
+ #
265
+ #
266
+ # recipe_dict = {
267
+ # "recipe_id": recipe_id,
268
+ # "testing_strategy_text": testing_strategy_text,
269
+ # "schema_processing_model", schema_processing_model,
270
+ # "pre_processing_strategy", pre_processing_strategy,
271
+ # "pre_processing_text", pre_processing_text,
272
+ # "pre_processing_model", pre_processing_model,
273
+ # "prompting_strategy", prompting_strategy,
274
+ # "plantings_and_fields_prompt", plantings_and_fields_prompt,
275
+ # "interactions_prompt", interactions_prompt,
276
+ # "treatments_prompt", treatments_prompt
277
+ # }
278
+ #
279
+ processed_data = {}
280
+ processed_data["input_style"] = 'big-block-input-text'
281
+ processed_data["input_text"] = input_data_piece
282
+ processed_data["prompts"]["firstschemaprompt"] = recipe_dict["plantings_and_fields_prompt"]
283
+ processed_data["prompts"]["secondschemaprompt"] = recipe_dict["interactions_prompt"]
284
+ processed_data["prompts"]["thirdschemaprompt"] = recipe_dict["treatments_prompt"]
285
+
286
+ processed_data["parameters"] = {}
287
+ processed_data["parameters"]["modelversion"] = recipe_dict["schema_processing_model"]
288
+ processed_data["parameters"]["promptstyle"] = recipe_dict["prompting_strategy"]
289
+ if recipe_dict["pre_processing_text"] is None and recipe_dict["pre_processing_strategy"] is None:
290
+ processed_data["parameters"]["preprocessdata"] = "no"
291
+ else:
292
+ processed_data["parameters"]["preprocessdata"] = "yes"
293
+ processed_data["parameters"]["preprocessmodelversion"] = recipe_dict["pre_processing_model"]
294
+ processed_data["parameters"]["multiplepreprompts"] = "no"
295
+ processed_data["parameters"]["prepromptstyle"] = recipe_dict["pre_processing_strategy"]
296
+ processed_data["parameters"]["preprocessingprompt1"] = recipe_dict["pre_processing_text"]
297
+ processed_data["parameters"]["preprocessingprompt2"] = ""
298
+ processed_data["parameters"]["preprocessingprompt3"] = ""
299
+
300
+ return processed_data
301
+
302
+
303
 
304
 
305
  def drive_process():
306
  # this is to drive the processing process
307
+
308
+ # Get the data from baserow (gold standards JSON and Input data)
309
  gold_standards, input_data = get_baserow_data()
310
+
311
+ # Get the recipes from baserow too
312
  my_recipes = get_recipes()
313
 
314
  # Input chunk structure
 
317
  # "otter_summary": liz_carrot_otter_summary_preprocessing,
318
  # "greg_summary": liz_carrot_greg_summary_preprocessing
319
  # },
320
+
321
+ output_rows = []
322
+ output_folder = "output_files"
323
+ if not os.path.exists(output_folder)
324
+ os.makedirs(output_folder)
325
 
326
  for recipe_dict in my_recipes:
327
  for key, input_chunks in input_data.items():
328
+
329
+ # Get the input data based on the recipe
330
  if recipe_dict["pre_processing_strategy"] == "Otter.ai Summary":
331
+ input_data_piece = input_chunks["otter_summary"]
332
  elif recipe_dict["pre_processing_strategy"] == "Greg Summary":
333
+ input_data_piece = input_chunks["greg_summary"]
334
  else:
335
+ input_data_piece = input_chunks["raw_interview"]
336
+
337
+ # Fill out a Surveystack submission
338
  fill_out_survey(recipe_dict, input_data)
339
+
340
+ # Prepare the data for the structured output setup
341
+ proc_spec = get_data_ready(recipe_dict, input_data_piece)
342
  completed_json = process_specifications(proc_spec)
343
 
344
 
345
+ # Get the gold standard for this input_chunk (liz_carrot, ben_soybean, wally_squash)
346
+ # Compare the generated JSON to the gold standard
347
+ gold_standard_json = gold_standard[key]
348
+ differences = list(diff(gold_standard_json, completed_json))
349
 
350
  # Convert to yaml
351
+ gold_standard_yaml = yaml.dump(gold_standard_json, default_flow_style=False)
352
+ comparison_yaml = yaml.dump(completed_json, default_flow_style=False)
353
+
354
+ recipe_id = recipe_dict.get("recipe_id", "N/A")
355
+ output_rows.append({
356
+ "Recipe_ID": recipe_id,
357
+ "Testing_Strategy_Text": recipe_dict.get("testing_strategy_text", "N/A"),
358
+ "Schema_Processing_Model": recipe_dict.get("schema_processing_model", "N/A"),
359
+ "Pre_Processing_Strategy": recipe_dict.get("pre_processing_strategy", "N/A"),
360
+ "Pre_Processing_Text": recipe_dict.get("pre_processing_text", "N/A"),
361
+ "Pre_Processing_Model": recipe_dict.get("pre_processing_model", "N/A"),
362
+ "Prompting_Strategy": recipe_dict.get("prompting_strategy", "N/A"),
363
+ "Plantings_and_Fields_Prompt": recipe_dict.get("plantings_and_fields_prompt", "N/A"),
364
+ "Interactions_Prompt": recipe_dict.get("interactions_prompt", "N/A"),
365
+ "Treatments_Prompt": recipe_dict.get("treatments_prompt", "N/A"),
366
+ "Input_Transcript": input_data,
367
+ "Gold_Standard_Key_Values": json.dumps(gold_standard_json, indent=2),
368
+ "Machine_Generated_Key_Values": json.dumps(completed_json, indent=2),
369
+ "Differences": json.dumps(differences, indent=2),
370
+ "Gold_Standard_YAML": gold_standard_yaml,
371
+ "Machine_Generated_YAML": comparison_yaml
372
+ })
373
+
374
+ df = pd.DataFrame(output_rows)
375
+
376
+ markdown_output = generate_markdown_output(df)
377
+ output_file = f"/output/recipe_run_{recipe_id}_{key}.md"
378
+ output_file_path = os.path.join(output_folder, output_file_name)
379
 
380
+ with open(output_file_path, "w") as file:
381
+ file.write(markdown_output)
382
+
383
+ print(f"Markdown file saved at {output_file_path}")
384
 
385
+ return output_folder
386