rosemariafontana commited on
Commit
1618d8d
·
verified ·
1 Parent(s): ef5da06

Update script_for_automation.py

Browse files
Files changed (1) hide show
  1. script_for_automation.py +43 -4
script_for_automation.py CHANGED
@@ -32,12 +32,15 @@ BASEROW_API_KEY = os.getenv("BASEROW_API_KEY")
32
  from process_data import process_specifications
33
 
34
  def get_baserow_url(table_id):
 
35
  BASEROW_API_BASE = "https://baserow.f11804a1.federatedcomputer.net/api"
36
  return f"{BASEROW_API_BASE}/database/rows/table/{table_id}/?user_field_names=true"
37
 
38
  def get_baserow_data():
39
  # This is to get the gold standards from baserow
40
  # We will also get the input data
 
 
41
 
42
  TABLE_ID = "560"
43
 
@@ -47,13 +50,17 @@ def get_baserow_data():
47
  "Authorization": f"Token {os.environ['BASEROW_API_KEY']}",
48
  "Content-Type": "application/json"
49
  }
50
-
 
51
  try:
52
  response = requests.get(BASEROW_URL, headers=headers)
 
53
  response.raise_for_status()
54
  rows = response.json()
55
  results = rows.get("results", [])
56
 
 
 
57
  for row in results:
58
  print(f"Row ID: {row.get('id')}, Data: {row}")
59
 
@@ -123,12 +130,19 @@ def get_baserow_data():
123
  }
124
  }
125
 
 
 
 
 
 
126
  return gold_standards, input_data
127
 
128
  except requests.exceptions.RequestException as e:
129
  print(f"Failed to fetch rows: {e}")
130
 
131
  def get_recipes():
 
 
132
  TABLE_ID = "578"
133
 
134
  BASEROW_URL = get_baserow_url(TABLE_ID)
@@ -137,7 +151,8 @@ def get_recipes():
137
  "Authorization": f"Token {os.environ['BASEROW_API_KEY']}",
138
  "Content-Type": "application/json"
139
  }
140
-
 
141
  try:
142
  response = requests.get(BASEROW_URL, headers=headers)
143
  response.raise_for_status()
@@ -145,6 +160,7 @@ def get_recipes():
145
  results = rows.get("results", [])
146
 
147
  my_recipes = []
 
148
  for row in results:
149
  print(f"Row ID: {row.get('id')}, Data: {row}")
150
  recipe_id = row.get("Recipe ID")
@@ -173,12 +189,16 @@ def get_recipes():
173
 
174
  my_recipes.append(recipe_dict)
175
 
 
 
 
176
  return my_recipes
177
 
178
  except requests.exceptions.RequestException as e:
179
  print(f"Failed to fetch rows: {e}")
180
 
181
  def fill_out_survey(recipe_dict, input_data):
 
182
  survey_id = "673b4994aef86f0533b3546c"
183
 
184
  base_url = "https://app.surveystack.io/api/submissions"
@@ -248,6 +268,7 @@ def fill_out_survey(recipe_dict, input_data):
248
  "Content-Type": "application/json",
249
  }
250
 
 
251
  try:
252
  response = requests.post(base_url, headers=headers, data=json.dumps(submission_data))
253
  response.raise_for_status()
@@ -279,6 +300,7 @@ def get_data_ready(recipe_dict, input_data_piece):
279
  # "treatments_prompt", treatments_prompt
280
  # }
281
  #
 
282
  processed_data = {}
283
  processed_data["input_style"] = 'big-block-input-text'
284
  processed_data["input_text"] = input_data_piece
@@ -300,6 +322,7 @@ def get_data_ready(recipe_dict, input_data_piece):
300
  processed_data["parameters"]["preprocessingprompt2"] = ""
301
  processed_data["parameters"]["preprocessingprompt3"] = ""
302
 
 
303
  return processed_data
304
 
305
  def generate_markdown_output(df):
@@ -370,7 +393,8 @@ def generate_markdown_output(df):
370
 
371
  def drive_process():
372
  # this is to drive the processing process
373
-
 
374
  # Get the data from baserow (gold standards JSON and Input data)
375
  gold_standards, input_data = get_baserow_data()
376
 
@@ -384,12 +408,17 @@ def drive_process():
384
  # "greg_summary": liz_carrot_greg_summary_preprocessing
385
  # },
386
 
 
387
  output_rows = []
388
  output_folder = "output_results_" +datetime.now().strftime("%Y%m%d_%H%M%S")
389
  os.makedirs(output_folder, exist_ok=True)
390
-
 
391
  for recipe_dict in my_recipes:
392
  for key, input_chunks in input_data.items():
 
 
 
393
 
394
  # Get the input data based on the recipe
395
  if recipe_dict["pre_processing_strategy"] == "Otter.ai Summary":
@@ -399,19 +428,26 @@ def drive_process():
399
  else:
400
  input_data_piece = input_chunks["raw_interview"]
401
 
 
 
 
402
  # Fill out a Surveystack submission
403
  fill_out_survey(recipe_dict, input_data)
404
 
405
  # Prepare the data for the structured output setup
406
  proc_spec = get_data_ready(recipe_dict, input_data_piece)
 
 
407
  completed_json = process_specifications(proc_spec)
408
 
409
 
 
410
  # Get the gold standard for this input_chunk (liz_carrot, ben_soybean, wally_squash)
411
  # Compare the generated JSON to the gold standard
412
  gold_standard_json = gold_standard[key]
413
  differences = list(diff(gold_standard_json, completed_json))
414
 
 
415
  # Convert to yaml
416
  gold_standard_yaml = yaml.dump(gold_standard_json, default_flow_style=False)
417
  comparison_yaml = yaml.dump(completed_json, default_flow_style=False)
@@ -438,6 +474,8 @@ def drive_process():
438
 
439
  df = pd.DataFrame(output_rows)
440
 
 
 
441
  markdown_output = generate_markdown_output(df)
442
  recipe_folder = os.path.join(output_folder, f"recipe_{recipe_dict['recipe_id']}")
443
  os.makedirs(recipe_folder, exist_ok=True)
@@ -460,6 +498,7 @@ def drive_process():
460
  with open(differences_file, 'w') as f:
461
  json.dump(differences, f, indent=2)
462
 
 
463
  # Zip the entire output folder
464
  zip_filename = f"{output_folder}.zip"
465
  shutil.make_archive(output_folder, 'zip', output_folder)
 
32
  from process_data import process_specifications
33
 
34
  def get_baserow_url(table_id):
35
+ print("GETTING BASEROW URL")
36
  BASEROW_API_BASE = "https://baserow.f11804a1.federatedcomputer.net/api"
37
  return f"{BASEROW_API_BASE}/database/rows/table/{table_id}/?user_field_names=true"
38
 
39
  def get_baserow_data():
40
  # This is to get the gold standards from baserow
41
  # We will also get the input data
42
+
43
+ print("GETTING BASEROW DATA")
44
 
45
  TABLE_ID = "560"
46
 
 
50
  "Authorization": f"Token {os.environ['BASEROW_API_KEY']}",
51
  "Content-Type": "application/json"
52
  }
53
+
54
+ print("STARTING TO TRY RESPONSE REQUEST")
55
  try:
56
  response = requests.get(BASEROW_URL, headers=headers)
57
+ print("GOT")
58
  response.raise_for_status()
59
  rows = response.json()
60
  results = rows.get("results", [])
61
 
62
+ print("PARSING ROWS NOW")
63
+
64
  for row in results:
65
  print(f"Row ID: {row.get('id')}, Data: {row}")
66
 
 
130
  }
131
  }
132
 
133
+ print("BASEROW DATA DONE GOT")
134
+ print("GOLD STANDARDS HERE")
135
+ print(gold_standards)
136
+ print("INPUT DATA HERE")
137
+ print(input_data)
138
  return gold_standards, input_data
139
 
140
  except requests.exceptions.RequestException as e:
141
  print(f"Failed to fetch rows: {e}")
142
 
143
  def get_recipes():
144
+ print("GETTING RECIPES FROM BASEROW NOW")
145
+
146
  TABLE_ID = "578"
147
 
148
  BASEROW_URL = get_baserow_url(TABLE_ID)
 
151
  "Authorization": f"Token {os.environ['BASEROW_API_KEY']}",
152
  "Content-Type": "application/json"
153
  }
154
+
155
+ print("TRYING TO GET A RESPONSE")
156
  try:
157
  response = requests.get(BASEROW_URL, headers=headers)
158
  response.raise_for_status()
 
160
  results = rows.get("results", [])
161
 
162
  my_recipes = []
163
+ print("PARSING ROWS")
164
  for row in results:
165
  print(f"Row ID: {row.get('id')}, Data: {row}")
166
  recipe_id = row.get("Recipe ID")
 
189
 
190
  my_recipes.append(recipe_dict)
191
 
192
+ print("FINISHED GETTING THE RECIPE DATA")
193
+ print("RECIPES HERE")
194
+ print(my_recipes)
195
  return my_recipes
196
 
197
  except requests.exceptions.RequestException as e:
198
  print(f"Failed to fetch rows: {e}")
199
 
200
  def fill_out_survey(recipe_dict, input_data):
201
+ print("filling out survey")
202
  survey_id = "673b4994aef86f0533b3546c"
203
 
204
  base_url = "https://app.surveystack.io/api/submissions"
 
268
  "Content-Type": "application/json",
269
  }
270
 
271
+ print("GETTING SURVEY RESPONSE")
272
  try:
273
  response = requests.post(base_url, headers=headers, data=json.dumps(submission_data))
274
  response.raise_for_status()
 
300
  # "treatments_prompt", treatments_prompt
301
  # }
302
  #
303
+ print("GETTING DATA READY")
304
  processed_data = {}
305
  processed_data["input_style"] = 'big-block-input-text'
306
  processed_data["input_text"] = input_data_piece
 
322
  processed_data["parameters"]["preprocessingprompt2"] = ""
323
  processed_data["parameters"]["preprocessingprompt3"] = ""
324
 
325
+ print("DID THAT NOW")
326
  return processed_data
327
 
328
  def generate_markdown_output(df):
 
393
 
394
  def drive_process():
395
  # this is to drive the processing process
396
+ print("We are starting to DRIVE PROCESS")
397
+
398
  # Get the data from baserow (gold standards JSON and Input data)
399
  gold_standards, input_data = get_baserow_data()
400
 
 
408
  # "greg_summary": liz_carrot_greg_summary_preprocessing
409
  # },
410
 
411
+ print("Making the OUTPUT STUFF")
412
  output_rows = []
413
  output_folder = "output_results_" +datetime.now().strftime("%Y%m%d_%H%M%S")
414
  os.makedirs(output_folder, exist_ok=True)
415
+
416
+ print("GOING THROUGH RECIPES NOW")
417
  for recipe_dict in my_recipes:
418
  for key, input_chunks in input_data.items():
419
+ print("RECIPE INFO")
420
+ print(key)
421
+ print(recipe_dict["recipe_id")
422
 
423
  # Get the input data based on the recipe
424
  if recipe_dict["pre_processing_strategy"] == "Otter.ai Summary":
 
428
  else:
429
  input_data_piece = input_chunks["raw_interview"]
430
 
431
+ print("DECIDED INPUT DATA")
432
+ print(input_data_piece)
433
+
434
  # Fill out a Surveystack submission
435
  fill_out_survey(recipe_dict, input_data)
436
 
437
  # Prepare the data for the structured output setup
438
  proc_spec = get_data_ready(recipe_dict, input_data_piece)
439
+
440
+ print("PROCESSING SPECIFICATIONS!!!!!!!!!!!!!!!")
441
  completed_json = process_specifications(proc_spec)
442
 
443
 
444
+ print("Gold Standard diff and stuff")
445
  # Get the gold standard for this input_chunk (liz_carrot, ben_soybean, wally_squash)
446
  # Compare the generated JSON to the gold standard
447
  gold_standard_json = gold_standard[key]
448
  differences = list(diff(gold_standard_json, completed_json))
449
 
450
+ print("yaml world")
451
  # Convert to yaml
452
  gold_standard_yaml = yaml.dump(gold_standard_json, default_flow_style=False)
453
  comparison_yaml = yaml.dump(completed_json, default_flow_style=False)
 
474
 
475
  df = pd.DataFrame(output_rows)
476
 
477
+ print("dataframe done now onto markdown")
478
+
479
  markdown_output = generate_markdown_output(df)
480
  recipe_folder = os.path.join(output_folder, f"recipe_{recipe_dict['recipe_id']}")
481
  os.makedirs(recipe_folder, exist_ok=True)
 
498
  with open(differences_file, 'w') as f:
499
  json.dump(differences, f, indent=2)
500
 
501
+ print("ZIPPING UP WHOLE THING")
502
  # Zip the entire output folder
503
  zip_filename = f"{output_folder}.zip"
504
  shutil.make_archive(output_folder, 'zip', output_folder)