File size: 26,957 Bytes
bfefbe8
632b9a9
080b93d
 
 
75acc99
 
 
bfefbe8
 
 
 
 
 
 
 
 
 
 
 
5802390
 
bfefbe8
 
 
 
 
 
 
 
 
8f21503
bfefbe8
632b9a9
1618d8d
bfefbe8
632b9a9
 
 
 
 
1618d8d
 
632b9a9
bfefbe8
632b9a9
 
bfefbe8
 
b7e10a1
bfefbe8
 
1618d8d
 
bfefbe8
632b9a9
1618d8d
bfefbe8
 
 
 
1618d8d
 
bfefbe8
 
 
 
 
 
 
632b9a9
 
 
 
bfefbe8
 
 
 
632b9a9
 
 
 
bfefbe8
 
 
 
632b9a9
 
 
 
 
 
5802390
 
 
 
632b9a9
5802390
 
 
 
632b9a9
5802390
 
 
 
632b9a9
 
bfefbe8
632b9a9
04df73d
bfefbe8
632b9a9
5802390
 
 
 
632b9a9
5802390
 
 
 
632b9a9
5802390
 
 
 
632b9a9
 
5802390
1618d8d
 
 
 
 
632b9a9
bfefbe8
 
 
 
632b9a9
1618d8d
cd50c8a
0595358
cd50c8a
1c29207
4c1eaa8
632b9a9
 
 
 
2e7e5db
632b9a9
 
1618d8d
 
632b9a9
 
 
 
 
 
 
1618d8d
632b9a9
 
bb2ea04
 
e5b8f1f
 
 
bb2ea04
e5b8f1f
 
632b9a9
 
 
 
 
bb2ea04
 
b1ea096
 
 
 
 
 
 
 
632b9a9
 
 
 
1618d8d
 
 
632b9a9
 
 
 
 
 
1618d8d
632b9a9
bfefbe8
632b9a9
 
 
 
 
 
bb2ea04
4eef335
bb2ea04
bfefbe8
632b9a9
 
 
 
 
 
 
 
5802390
632b9a9
 
 
 
 
bb2ea04
 
 
 
 
 
 
 
 
 
 
632b9a9
 
 
 
 
 
 
 
5802390
632b9a9
 
 
 
 
344694a
bb2ea04
 
 
 
 
 
 
 
 
 
 
632b9a9
 
5802390
 
 
 
 
1618d8d
5802390
 
 
 
 
 
 
8f21503
5802390
 
 
 
04fd40a
080b93d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1618d8d
080b93d
e0e50ea
 
62a0998
c9607c5
080b93d
 
 
 
 
 
 
f523875
 
080b93d
 
 
 
 
 
 
 
 
 
1618d8d
080b93d
59fd861
5fadba9
 
 
 
 
 
 
 
 
 
 
 
dc2d686
 
 
 
 
 
 
 
 
08a1772
 
 
 
 
 
 
 
 
 
 
1b9c64b
08a1772
1b9c64b
 
59fd861
04df73d
c494d38
59fd861
 
 
 
04df73d
59fd861
 
 
 
 
 
1b9c64b
08a1772
59fd861
 
dfb2847
04df73d
 
 
 
dfb2847
1a3f77c
dadbe53
 
aaa5384
 
04df73d
 
 
59fd861
 
 
1b9c64b
08a1772
59fd861
1b9c64b
04df73d
 
1b9c64b
04df73d
 
 
dc2d686
04df73d
1831656
 
04df73d
 
8fa4ac1
1b9c64b
04df73d
 
409183d
 
04df73d
1831656
 
 
 
04df73d
632b9a9
 
 
1618d8d
 
080b93d
5802390
080b93d
 
5802390
 
 
 
 
 
 
 
080b93d
1618d8d
75acc99
 
1618d8d
 
5802390
 
952d06a
1618d8d
 
73600ae
080b93d
 
5802390
080b93d
5802390
080b93d
5802390
080b93d
 
1618d8d
 
 
080b93d
04df73d
25ba4ab
080b93d
 
 
1618d8d
a4d2607
04df73d
5a1fa55
04df73d
 
 
 
 
 
 
e603db5
 
 
04df73d
4e780da
 
a4d2607
 
f24861a
a4d2607
5da5387
 
 
 
9a08b24
633524b
 
 
4e780da
 
 
e603db5
04df73d
 
 
 
8f21503
4e780da
 
 
f3f99e0
 
 
 
 
 
 
 
 
04df73d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
080b93d
 
 
 
 
 
 
 
 
 
 
 
c494d38
04df73d
 
907ad44
080b93d
 
 
 
1618d8d
 
080b93d
75acc99
 
 
 
 
 
 
 
 
 
 
 
 
 
817a182
75acc99
 
 
 
6e8b5f5
75acc99
1618d8d
75acc99
 
 
 
 
 
 
 
 
5802390
080b93d
632b9a9
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
import requests 
from jsondiff import diff
import yaml
import pandas as pd
import os
import shutil
import json
from datetime import datetime

# The purpose of this script is to automate running a bunch of tests
# This script will take an input folder
# The input folder should contain: 
# 1. A file containing a list of the recipe parameters 
# 2. A file containing the input data for each of the schemas 
# 3. ....

# Steps to do this that we will outline then perform 
# First, get the gold standard JSONs from baserow 
# Next, get the recipe parameter list from the input folder 
# Iterate through the recipe parameter list one at a time
# In the iteration, first fill out a surveystack submission - is this possible with the current surveystack API?
# Next, save the surveystack submission ID (?)
# Use the iteration parameters to then get the three JSONs back from chatgpt 
# Compare the JSONs to the gold standard JSONs 
# Print out the differences in a .csv 
# Print out a side by side of the yaml 
# store all these together 
# continue through iterations 
# create downloadables of the results 

BASEROW_API_KEY = os.getenv("BASEROW_API_KEY")
from process_data import process_specifications

def get_baserow_url(table_id): 
    print("GETTING BASEROW URL")
    BASEROW_API_BASE = "https://baserow.f11804a1.federatedcomputer.net/api"
    return f"{BASEROW_API_BASE}/database/rows/table/{table_id}/?user_field_names=true"

def get_baserow_data():
    # This is to get the gold standards from baserow
    # We will also get the input data 

    print("GETTING BASEROW DATA")
    
    TABLE_ID = "560"

    BASEROW_URL = get_baserow_url(TABLE_ID)

    headers = {
        "Authorization": f"Token {os.environ['BASEROW_API_KEY']}",
        "Content-Type": "application/json"
    }

    print("STARTING TO TRY RESPONSE REQUEST")
    try: 
        response = requests.get(BASEROW_URL, headers=headers)
        print("GOT")
        response.raise_for_status()
        rows = response.json()
        results = rows.get("results", [])

        print("PARSING ROWS NOW")

        for row in results: 
            print(f"Row ID: {row.get('id')}, Data: {row}")

            if row.get("id") == 2:
                liz_carrot_plantings_gold_standard = row.get("Plantings and Fields - Gold Standard")
                liz_carrot_interactions_gold_standard = row.get("Interactions - Gold Standard")
                liz_carrot_trials_gold_standard = row.get("Trials - Gold Standard")
                
                liz_carrot_input_data_raw_interview = row.get("Raw Interview")
                liz_carrot_otter_summary_preprocessing = row.get("Otter Summary")
                liz_carrot_greg_summary_preprocessing = row.get("Post-Interview Summary")
            elif row.get("id") == 3:
                ben_soybean_plantings_gold_standard = row.get("Plantings and Fields - Gold Standard")
                ben_soybean_interactions_gold_standard = row.get("Interactions - Gold Standard")
                ben_soybean_trials_gold_standard = row.get("Trials - Gold Standard")
                
                ben_soybean_input_data_raw_interview = row.get("Raw Interview")
                ben_soybean_otter_summary_preprocessing = row.get("Otter Summary")
                ben_soybean_greg_summary_preprocessing = row.get("Post-Interview Summary")
            elif row.get("id") == 5:
                wally_squash_plantings_gold_standard = row.get("Plantings and Fields - Gold Standard")
                wally_squash_interactions_gold_standard = row.get("Interactions - Gold Standard")
                wally_squash_trials_gold_standard = row.get("Trials - Gold Standard")
                
                wally_squash_input_data_raw_interview = row.get("Raw Interview")
                wally_squash_otter_summary_preprocessing = row.get("Otter Summary")
                wally_squash_greg_summary_preprocessing = row.get("Post-Interview Summary")

        gold_standards = {
            "liz_carrot": {
                "planting": liz_carrot_plantings_gold_standard,
                "interactions": liz_carrot_interactions_gold_standard,
                "trials": liz_carrot_trials_gold_standard,
            },
            "ben_soybean": {
                "planting": ben_soybean_plantings_gold_standard,
                "interactions": ben_soybean_interactions_gold_standard,
                "trials": ben_soybean_trials_gold_standard,
            },
            "wally_squash": {
                "planting": wally_squash_plantings_gold_standard,
                "interactions": wally_squash_interactions_gold_standard,
                "trials": wally_squash_trials_gold_standard,
            }
        }

        # How to retrieve this data
        # liz_carrot_planting = gold_standards["liz_carrot"]["planting"]

        input_data = {
            "liz_carrot": {
                "raw_interview": liz_carrot_input_data_raw_interview,
                "otter_summary": liz_carrot_otter_summary_preprocessing,
                "greg_summary": liz_carrot_greg_summary_preprocessing
            },
            "ben_soybean": {
                "raw_interview": ben_soybean_input_data_raw_interview,
                "otter_summary": ben_soybean_otter_summary_preprocessing,
                "greg_summary": ben_soybean_greg_summary_preprocessing
            },
            "wally_squash": {
                "raw_interview": wally_squash_input_data_raw_interview,
                "otter_summary": wally_squash_otter_summary_preprocessing,
                "greg_summary": wally_squash_greg_summary_preprocessing
            }
        }

        print("BASEROW DATA DONE GOT")
        print("GOLD STANDARDS HERE")
        print(gold_standards)
        print("INPUT DATA HERE")
        print(input_data)
        return gold_standards, input_data

    except requests.exceptions.RequestException as e: 
        print(f"Failed to fetch rows: {e}")
    
def get_recipes():
    print("GETTING RECIPES FROM BASEROW NOW")

    #TABLE_ID = "588"
    #TABLE_ID = "578"
    #TABLE_ID = "580" This table contains only one row for testing purposes
    TABLE_ID = "589"

    BASEROW_URL = get_baserow_url(TABLE_ID)

    headers = {
        "Authorization": f"Token {os.environ['BASEROW_API_KEY']}",
        "Content-Type": "application/json"
    }

    print("TRYING TO GET A RESPONSE")
    try: 
        response = requests.get(BASEROW_URL, headers=headers)
        response.raise_for_status()
        rows = response.json()
        results = rows.get("results", [])

        my_recipes = []
        print("PARSING ROWS")
        for row in results: 
            print(f"Row ID: {row.get('id')}, Data: {row}")
            recipe_id = row.get("Recipe ID")
            testing_strategy_text = row.get("Testing Strategy for Set")
            
            schema_processing_model = row.get("Schema Processing Model", {}).get("value", None)
            pre_processing_strategy = row.get("Pre-Processing Strategy", [{}])[0].get("value", None)
            pre_processing_text = row.get("Pre-Prompt Text")
            pre_processing_model = row.get("Preprocessing Model", {}).get("value", None)
            prompting_strategy = row.get("Prompting Strategy", [{}])[0].get("value", None)
            plantings_and_fields_prompt = row.get("Plantings and Fields Prompting Text")
            interactions_prompt = row.get("Interactions Prompting Text")
            treatments_prompt = row.get("Treatments Prompting Text")

            recipe_dict = {
                "recipe_id": recipe_id,
                "testing_strategy_text": testing_strategy_text,
                "schema_processing_model": schema_processing_model,
                "pre_processing_strategy": pre_processing_strategy,
                "pre_processing_text": pre_processing_text,
                "pre_processing_model": pre_processing_model,
                "prompting_strategy": prompting_strategy,
                "plantings_and_fields_prompt": plantings_and_fields_prompt,
                "interactions_prompt": interactions_prompt,
                "treatments_prompt": treatments_prompt
            }
            
            my_recipes.append(recipe_dict)

        print("FINISHED GETTING THE RECIPE DATA")
        print("RECIPES HERE")
        print(my_recipes)
        return my_recipes    

    except requests.exceptions.RequestException as e: 
        print(f"Failed to fetch rows: {e}")

def fill_out_survey(recipe_dict, input_data): 
    print("filling out survey")
    survey_id = "673b4994aef86f0533b3546c"
    
    base_url = "https://app.surveystack.io/api/submissions"

    if recipe_dict.get("pre_processing_text") is None:
        pre_processing = False
        pre_process = "no"
        pre_process_model_version = "None"
    else:
        pre_processing = True
        pre_process = recipe_dict

    # Set the prompting strategy to be a variable from the list 
    # Do this here 
    
    if pre_processing: 
        submission_data = {
            "survey": survey_id, 
            "data": {
                "inputstyle": "big-block-input-text",
                "onelonginputtext": input_data,
                "schema_prompt": {
                    "firstschemaprompt": recipe_dict["plantings_and_fields_prompt"],
                    "secondschemaprompt": recipe_dict["interactions_prompt"],
                    "thirdschemaprompt": recipe_dict["treatments_prompt"],
                },
            },
            "parameters": {
                "modelversion": recipe_dict["schema_processing_model"],
                "preprocessdata": ["yes"],
                "promptstyle": recipe_dict["prompting_strategy"],
                "preprocessmodelversion": recipe_dict["prompting_strategy"],
                "multiplepreprompts": "no",
                "prepromptstyle": recipe_dict["pre_processing_strategy"],
                "preprocessingprompt1": recipe_dict["pre_processing_text"],
                "preprocessingprompt2": "",
                "preprocessingprompt3": ""
            }
        }
        
    else: 
        submission_data = {
            "survey": survey_id, 
            "data": {
                "inputstyle": "big-block-input-text",
                "onelonginputtext": input_data,
                "schema_prompt": {
                    "firstschemaprompt": recipe_dict["plantings_and_fields_prompt"],
                    "secondschemaprompt": recipe_dict["interactions_prompt"],
                    "thirdschemaprompt": recipe_dict["treatments_prompt"],
                },
            },
            "parameters": {
                "modelversion": recipe_dict["schema_processing_model"],
                "preprocessdata": ["no"],
                "promptstyle": recipe_dict["prompting_strategy"],
                "preprocessmodelversion": None,
                "multiplepreprompts": "no",
                "prepromptstyle": None,
                "preprocessingprompt1": None,
                "preprocessingprompt2": None,
                "preprocessingprompt3": None
                
            }
        }

    headers = {
        "Content-Type": "application/json",
    }

    print("GETTING SURVEY RESPONSE")
    try:
        response = requests.post(base_url, headers=headers, data=json.dumps(submission_data))
        response.raise_for_status()

        if response.status_code == 200:
            print("Submission successful to SurveyStack!")
            print(response.json())
            return submission_data
        else: 
            print(f"Failed to submit: {response.status_code} - {response.text}")
    except requests.exceptions.RequestException as e: 
        print(f"An error occurred while submitting the data: {e}")

def get_data_ready(recipe_dict, input_data_piece):
    ## Input chunk structure
    #     "raw_interview": liz_carrot_input_data_raw_interview,
    # 
    #
    # recipe_dict = {
    #            "recipe_id": recipe_id,
    #            "testing_strategy_text": testing_strategy_text,
    #            "schema_processing_model", schema_processing_model,
    #            "pre_processing_strategy", pre_processing_strategy,
    #            "pre_processing_text", pre_processing_text,
    #            "pre_processing_model", pre_processing_model,
    #            "prompting_strategy", prompting_strategy,
    #            "plantings_and_fields_prompt", plantings_and_fields_prompt,
    #            "interactions_prompt", interactions_prompt,
    #            "treatments_prompt", treatments_prompt
    #        }
    #
    print("GETTING DATA READY")
    processed_data = {}
    processed_data["prompts"] = {}
    
    processed_data["inputstyle"] = 'big-block-input-text'
    processed_data["input_text"] = input_data_piece
    processed_data["prompts"]["firstschemaprompt"] = recipe_dict["plantings_and_fields_prompt"]
    processed_data["prompts"]["secondschemaprompt"] = recipe_dict["interactions_prompt"]
    processed_data["prompts"]["thirdschemaprompt"] = recipe_dict["treatments_prompt"]

    processed_data["parameters"] = {}
    processed_data["parameters"]["modelversion"] = recipe_dict["schema_processing_model"]
    processed_data["parameters"]["promptstyle"] = recipe_dict["prompting_strategy"]

    if (recipe_dict["pre_processing_strategy"] == "None") and (recipe_dict["pre_processing_model"] == "No preprocessing"): 
        processed_data["parameters"]["preprocessdata"] = "no"
    else:
        processed_data["parameters"]["preprocessdata"] = "yes"
        processed_data["parameters"]["preprocessmodelversion"] = recipe_dict["pre_processing_model"]
        processed_data["parameters"]["multiplepreprompts"] =  "no"
        processed_data["parameters"]["prepromptstyle"] = recipe_dict["pre_processing_strategy"]
        processed_data["parameters"]["preprocessingprompt1"] = recipe_dict["pre_processing_text"]
        processed_data["parameters"]["preprocessingprompt2"] = ""
        processed_data["parameters"]["preprocessingprompt3"] = ""

    print("DID THAT NOW")
    return processed_data

def format_json(json_data, truncate_length=500):
    try:
        # Try to load the JSON data
        parsed_data = json.loads(json_data)
        # Convert it into a pretty-printed string
        formatted_json = json.dumps(parsed_data, indent=2)
        # Truncate if it's too long
        return formatted_json[:truncate_length] + "..." if len(formatted_json) > truncate_length else formatted_json
    except json.JSONDecodeError:
        # If it's not valid JSON, return the string as it is
        return json_data[:truncate_length] + "..." if len(json_data) > truncate_length else json_data

# Custom method to handle all objects
def custom_serializer(obj):
    if isinstance(obj, Enum):
        return obj.name  # Or obj.value, depending on what you need
    if isinstance(obj, Soil):
        return obj.to_dict()
    if isinstance(obj, Yield):
        return obj.to_dict()
    return obj.__dict__  # Default case: use the __dict__ method for other custom objects

def sanitize_json_for_yaml(data):
    if isinstance(data, dict):
        return {key: sanitize_json_for_yaml(value) for key, value in data.items()}
    elif isinstance(data, list):
        return [sanitize_json_for_yaml(item) for item in data]
    elif isinstance(data, tuple):  # Convert tuples to lists
        return list(data)
    else:
        return data  # Keep other types as-is

def generate_markdown_output(df):
    # Start the markdown output string
    markdown = ""

    # 1. Input Transcript
    markdown += "\n## Input Transcript\n"
    for _, row in df.iterrows():
        truncated_input = row['Input_Transcript'][:500] + "..." if len(row['Input_Transcript']) > 500 else row['Input_Transcript']
        markdown += f"**Recipe ID {row['Recipe_ID']}**:\n```\n{truncated_input}\n```\n\n"

    # 2. Recipe Fields
    markdown += "\n## Recipe Fields\n"
    recipe_columns = [
        "Recipe ID", "Testing Strategy", "Schema Processing Model", "Pre-Processing Strategy", 
        "Pre-Processing Text", "Pre-Processing Model", "Prompting Strategy"
    ]
    recipe_table = "| " + " | ".join(recipe_columns) + " |\n"
    recipe_table += "| " + " | ".join(["-" * len(col) for col in recipe_columns]) + " |\n"
    for _, row in df.iterrows():
        recipe_table += f"| {row['Recipe_ID']} | {row['Testing_Strategy_Text']} | {row['Schema_Processing_Model']} | {row['Pre_Processing_Strategy']} | {row['Pre_Processing_Text']} | {row['Pre_Processing_Model']} | {row['Prompting_Strategy']} |\n"
    markdown += recipe_table + "\n"

     # 3. Differences
    markdown += "\n## Differences\n"
    for _, row in df.iterrows():
        markdown += f"\n### Recipe ID: {row['Recipe_ID']}\n"
        differences = row['Differences']
        
        # Loop through the differences list
        for key, value in differences.items():
            markdown += f"#### {key.capitalize()}\n"
            for item in value:
                markdown += f" - {item}\n"

    # 4. Prompts
    markdown += "\n## Prompts\n"
    prompt_columns = ["Plantings and Fields Prompt", "Interactions Prompt", "Treatments Prompt"]
    prompt_table = "| " + " | ".join(prompt_columns) + " |\n"
    prompt_table += "| " + " | ".join(["-" * len(col) for col in prompt_columns]) + " |\n"
    for _, row in df.iterrows():
        prompt_table += f"| {row['Plantings_and_Fields_Prompt']} | {row['Interactions_Prompt']} | {row['Treatments_Prompt']} |\n"
    markdown += prompt_table + "\n"

    # 5. Side-by-Side JSON Comparisons
    markdown += "\n## Gold Standard vs Machine Generated JSON\n"
    for _, row in df.iterrows():
        markdown += f"\n### Recipe ID: {row['Recipe_ID']}\n"
        for key in ["planting", "interactions", "trials"]:
            gold = json.dumps(row['Gold_Standard_JSON'].get(key, {}), indent=2)
            machine = json.dumps(row['Machine_Generated_JSON'].get(key, {}), default=custom_serializer, indent=2)
            markdown += f"#### {key.capitalize()}\n"
            markdown += f"**Gold Standard JSON**:\n```json\n{gold}\n```\n"
            markdown += f"**Machine Generated JSON**:\n```json\n{machine}\n```\n"

    # 6. Side-by-Side YAML Comparisons
    markdown += "\n## Gold Standard vs Machine Generated YAML\n"
    for _, row in df.iterrows():
        markdown += f"\n### Recipe ID: {row['Recipe_ID']}\n"
        for key in ["planting", "interactions", "trials"]:
            gold = yaml.dump(row['Gold_Standard_JSON'].get(key, {}), default_flow_style=False, sort_keys=True)
            machine = yaml.dump(row['Machine_Generated_JSON'].get(key, {}), default_flow_style=False, sort_keys=True)
            markdown += f"#### {key.capitalize()}\n"
            markdown += f"**Gold Standard YAML**:\n```yaml\n{gold}\n```\n"
            markdown += f"**Machine Generated YAML**:\n```yaml\n{machine}\n```\n"
    
    return markdown

    
def drive_process(): 
    # this is to drive the processing process 
    print("We are starting to DRIVE PROCESS")
    
    # Get the data from baserow (gold standards JSON and Input data)
    gold_standards, input_data = get_baserow_data()

    # Get the recipes from baserow too
    my_recipes = get_recipes()

    # Input chunk structure
    #    "liz_carrot": {
    #            "raw_interview": liz_carrot_input_data_raw_interview,
    #            "otter_summary": liz_carrot_otter_summary_preprocessing,
    #            "greg_summary": liz_carrot_greg_summary_preprocessing
    #        },

    print("Making the OUTPUT STUFF")
    output_folder = "output_results_" +datetime.now().strftime("%Y%m%d_%H%M%S")
    os.makedirs(output_folder, exist_ok=True)

    print("GOING THROUGH RECIPES NOW")
    for recipe_dict in my_recipes:
        for key, input_chunks in input_data.items():
            output_rows = []
            print("RECIPE INFO")
            print(key)
            print(recipe_dict["recipe_id"])

            # Get the input data based on the recipe
            if recipe_dict["pre_processing_strategy"] == "Otter.ai Summary":
                input_data_piece = input_chunks["otter_summary"]
            elif recipe_dict["pre_processing_strategy"] == "Greg Summary":
                input_data_piece = input_chunks["greg_summary"]
            else:
                input_data_piece = input_chunks["raw_interview"]

            print("DECIDED INPUT DATA")
            print(input_data_piece)

            # Fill out a Surveystack submission
            # This isn't accepted by the data 
            #fill_out_survey(recipe_dict, input_data)

            # Prepare the data for the structured output setup
            proc_spec = get_data_ready(recipe_dict, input_data_piece)

            print("Gold Standard")
            # Get the gold standard for this input_chunk (key = liz_carrot, ben_soybean, wally_squash)
            gold_standard_json = gold_standards[key]

            # "liz_carrot": {
            #    "planting": liz_carrot_plantings_gold_standard,
            #    "interactions": liz_carrot_interactions_gold_standard,
            #    "trials": liz_carrot_trials_gold_standard,
            # },
            
            gold_standard_planting_json = json.loads(gold_standard_json["planting"])
            gold_standard_interactions_json = json.loads(gold_standard_json["interactions"])
            gold_standard_trials_json = json.loads(gold_standard_json["trials"])

            print("Gold standard json after loading")
            print(gold_standard_planting_json)
            
            print("PROCESSING SPECIFICATIONS!!!!!!!!!!!!!!!")
            processed_farm_activity_json, processed_interactions_json, processed_trials_json = process_specifications(proc_spec)

            # THIS SHOULD ONLY BE USED FOR TESTING
            #processed_farm_activity_json = gold_standard_planting_json
            #processed_interactions_json = gold_standard_interactions_json
            #processed_trials_json = gold_standard_trials_json

            processed_farm_activity_json = json.loads(processed_farm_activity_json)
            processed_interactions_json = json.loads(processed_interactions_json)
            processed_trials_json = json.loads(processed_trials_json)

            print("Processed and loaded 1st json from machine gen")
            print(processed_farm_activity_json)
        
            # Compare the generated JSON to the gold standard 
            differences_planting = list(diff(gold_standard_planting_json, processed_farm_activity_json))
            differences_interactions = list(diff(gold_standard_interactions_json, processed_interactions_json))
            differences_trials = list(diff(gold_standard_trials_json, processed_trials_json))

            print("Diff planting")
            print(differences_planting)

            # Convert to yaml
            completed_gold_standard_planting_json = sanitize_json_for_yaml(gold_standard_planting_json)
            completed_gold_standard_interactions_json = sanitize_json_for_yaml(gold_standard_interactions_json)
            completed_gold_standard_trials_json = sanitize_json_for_yaml(gold_standard_trials_json)

            completed_processed_farm_activity_json = sanitize_json_for_yaml(processed_farm_activity_json)
            completed_processed_interactions_json = sanitize_json_for_yaml(processed_interactions_json)
            completed_processed_trials_json = sanitize_json_for_yaml(processed_trials_json)

            json_diff = {
                "planting": differences_planting,
                "interactions": differences_interactions,
                "trials": differences_trials
            }
            
            gold_standard_json = {
                "planting": completed_gold_standard_planting_json,
                "interactions": completed_gold_standard_interactions_json,
                "trials": completed_gold_standard_trials_json
            }

            comparison_json = {
                "planting": completed_processed_farm_activity_json,
                "interactions": completed_processed_interactions_json,
                "trials": completed_processed_trials_json
            }

            recipe_id = recipe_dict.get("recipe_id", "N/A")
            output_rows.append({
                "Recipe_ID": recipe_id,
                "Testing_Strategy_Text": recipe_dict.get("testing_strategy_text", "N/A"),
                "Schema_Processing_Model": recipe_dict.get("schema_processing_model", "N/A"),
                "Pre_Processing_Strategy": recipe_dict.get("pre_processing_strategy", "N/A"),
                "Pre_Processing_Text": recipe_dict.get("pre_processing_text", "N/A"),
                "Pre_Processing_Model": recipe_dict.get("pre_processing_model", "N/A"),
                "Prompting_Strategy": recipe_dict.get("prompting_strategy", "N/A"),
                "Plantings_and_Fields_Prompt": recipe_dict.get("plantings_and_fields_prompt", "N/A"),
                "Interactions_Prompt": recipe_dict.get("interactions_prompt", "N/A"),
                "Treatments_Prompt": recipe_dict.get("treatments_prompt", "N/A"),
                "Input_Transcript": input_chunks,  
                "Gold_Standard_JSON": gold_standard_json,
                "Machine_Generated_JSON": comparison_json,
                "Differences": json_diff
            })

            df = pd.DataFrame(output_rows)

            print("dataframe done now onto markdown")

            markdown_output = generate_markdown_output(df)
            recipe_folder = os.path.join(output_folder, f"recipe_{recipe_dict['recipe_id']}")
            os.makedirs(recipe_folder, exist_ok=True)

            # Save markdown to file
            markdown_file = os.path.join(recipe_folder, f"recipe_{recipe_dict['recipe_id']}_data_{key}_output.md")
            with open(markdown_file, 'w') as f:
                f.write(markdown_output)

            # Save JSON files
            json_file_gold = os.path.join(recipe_folder, f"recipe_{recipe_dict['recipe_id']}_data_{key}_gold_standard.json")
            json_file_generated = os.path.join(recipe_folder, f"recipe_{recipe_dict['recipe_id']}_data_{key}_generated.json")
            with open(json_file_gold, 'w') as f:
                json.dump(gold_standard_json, f, indent=2)
            with open(json_file_generated, 'w') as f:
                json.dump(comparison_json, f, indent=2)

            # Optionally save differences as a separate file
            differences_file = os.path.join(recipe_folder, f"recipe_{recipe_dict['recipe_id']}_data_{key}_differences.json")
            with open(differences_file, 'w') as f:
                f.write(str(differences_file))

    print("ZIPPING UP WHOLE THING")
    # Zip the entire output folder
    zip_filename = f"{output_folder}.zip"
    shutil.make_archive(output_folder, 'zip', output_folder)

    # Cleanup by removing the unzipped folder after zipping it
    shutil.rmtree(output_folder)

    # Return the zip file for downloading
    return zip_filename
    
    return output_folder