Update script_for_automation.py
Browse files- script_for_automation.py +25 -31
script_for_automation.py
CHANGED
|
@@ -330,84 +330,78 @@ def get_data_ready(recipe_dict, input_data_piece):
|
|
| 330 |
print("DID THAT NOW")
|
| 331 |
return processed_data
|
| 332 |
|
|
|
|
|
|
|
|
|
|
| 333 |
def generate_markdown_output(df):
|
| 334 |
# Start the markdown output string
|
| 335 |
markdown = ""
|
| 336 |
|
| 337 |
-
#
|
| 338 |
-
markdown += "###
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 339 |
markdown += "| Recipe ID | Testing Strategy | Schema Processing Model | Pre-Processing Strategy | Pre-Processing Text | Pre-Processing Model | Prompting Strategy |\n"
|
| 340 |
markdown += "|-----------|------------------|-------------------------|--------------------------|---------------------|----------------------|-------------------|\n"
|
| 341 |
|
| 342 |
-
# Iterate over rows to create the first table (recipe_id to prompting_strategy)
|
| 343 |
for _, row in df.iterrows():
|
| 344 |
-
# Ensure all fields have a consistent length (pad with empty string if None)
|
| 345 |
markdown += f"| {str(row['Recipe_ID']).ljust(10)} | {str(row['Testing_Strategy_Text']).ljust(20)} | {str(row['Schema_Processing_Model']).ljust(25)} | {str(row['Pre_Processing_Strategy']).ljust(23)} | {str(row['Pre_Processing_Text']).ljust(20)} | {str(row['Pre_Processing_Model']).ljust(20)} | {str(row['Prompting_Strategy']).ljust(25)} |\n"
|
| 346 |
|
| 347 |
-
#
|
| 348 |
markdown += "\n### Prompts\n"
|
| 349 |
markdown += "| Plantings and Fields Prompt | Interactions Prompt | Treatments Prompt |\n"
|
| 350 |
markdown += "|-----------------------------|---------------------|-------------------|\n"
|
| 351 |
|
| 352 |
-
# Iterate over rows to create the second table (plantings_and_fields_prompt, interactions_prompt, treatments_prompt)
|
| 353 |
for _, row in df.iterrows():
|
| 354 |
-
# Ensure all fields are of consistent length
|
| 355 |
markdown += f"| {str(row['Plantings_and_Fields_Prompt']).ljust(30)} | {str(row['Interactions_Prompt']).ljust(20)} | {str(row['Treatments_Prompt']).ljust(20)} |\n"
|
| 356 |
|
| 357 |
-
#
|
| 358 |
-
markdown += "\n### Input Transcript\n"
|
| 359 |
-
markdown += "Since the input transcript might be very long, it is truncated here for readability:\n"
|
| 360 |
-
|
| 361 |
-
# Display a truncated version of the input transcript to avoid long text in the table
|
| 362 |
-
for _, row in df.iterrows():
|
| 363 |
-
truncated_input = (row['Input_Transcript'][:500] + '...') if len(row['Input_Transcript']) > 500 else row['Input_Transcript']
|
| 364 |
-
markdown += f"**Recipe ID {row['Recipe_ID']}**: {truncated_input}\n\n"
|
| 365 |
-
|
| 366 |
-
# Side-by-side comparison of Gold Standard and Machine Generated Key-Values
|
| 367 |
markdown += "\n### Gold Standard vs Machine Generated Key-Values\n"
|
| 368 |
markdown += "| Key | Gold Standard | Machine Generated |\n"
|
| 369 |
markdown += "|-----|---------------|-------------------|\n"
|
| 370 |
|
| 371 |
-
# Iterate over rows to create the comparison table
|
| 372 |
for _, row in df.iterrows():
|
| 373 |
markdown += f"| {str(row['Recipe_ID']).ljust(10)} | {str(row['Gold_Standard_Key_Values']).ljust(25)} | {str(row['Machine_Generated_Key_Values']).ljust(25)} |\n"
|
| 374 |
|
| 375 |
-
#
|
| 376 |
markdown += "\n### Differences\n"
|
| 377 |
-
markdown += "The following differences were found between the gold standard and the machine-generated output:\n"
|
| 378 |
markdown += "| Key | Difference |\n"
|
| 379 |
markdown += "|-----|------------|\n"
|
| 380 |
|
| 381 |
for _, row in df.iterrows():
|
| 382 |
-
# Assuming 'Differences' is a list of dictionaries with keys and changes
|
| 383 |
differences = row['Differences']
|
| 384 |
if isinstance(differences, list):
|
| 385 |
for diff in differences:
|
| 386 |
-
# Ensure that diff has a 'values_changed' key
|
| 387 |
if isinstance(diff, dict) and 'values_changed' in diff:
|
| 388 |
for path, change in diff['values_changed'].items():
|
| 389 |
-
# Ensure we have both 'old_value' and 'new_value'
|
| 390 |
if 'old_value' in change and 'new_value' in change:
|
| 391 |
markdown += f"| {str(path).ljust(20)} | {str(change['old_value']).ljust(20)} -> {str(change['new_value']).ljust(20)} |\n"
|
| 392 |
else:
|
| 393 |
-
|
| 394 |
else:
|
| 395 |
-
|
| 396 |
else:
|
| 397 |
-
|
| 398 |
|
| 399 |
-
#
|
| 400 |
markdown += "\n### Gold Standard vs Machine Generated YAML\n"
|
| 401 |
markdown += "| Gold Standard YAML | Machine Generated YAML |\n"
|
| 402 |
markdown += "|--------------------|------------------------|\n"
|
| 403 |
|
| 404 |
-
# Add the side-by-side YAML comparison
|
| 405 |
for _, row in df.iterrows():
|
| 406 |
-
|
|
|
|
|
|
|
| 407 |
|
| 408 |
return markdown
|
| 409 |
|
| 410 |
-
|
| 411 |
def drive_process():
|
| 412 |
# this is to drive the processing process
|
| 413 |
print("We are starting to DRIVE PROCESS")
|
|
@@ -481,7 +475,7 @@ def drive_process():
|
|
| 481 |
"Plantings_and_Fields_Prompt": recipe_dict.get("plantings_and_fields_prompt", "N/A"),
|
| 482 |
"Interactions_Prompt": recipe_dict.get("interactions_prompt", "N/A"),
|
| 483 |
"Treatments_Prompt": recipe_dict.get("treatments_prompt", "N/A"),
|
| 484 |
-
"Input_Transcript":
|
| 485 |
"Gold_Standard_Key_Values": gold_standard_json,
|
| 486 |
"Machine_Generated_Key_Values": completed_json,
|
| 487 |
"Differences": differences,
|
|
|
|
| 330 |
print("DID THAT NOW")
|
| 331 |
return processed_data
|
| 332 |
|
| 333 |
+
import yaml
|
| 334 |
+
import json
|
| 335 |
+
|
| 336 |
def generate_markdown_output(df):
|
| 337 |
# Start the markdown output string
|
| 338 |
markdown = ""
|
| 339 |
|
| 340 |
+
# Input Transcript Section
|
| 341 |
+
markdown += "### Input Transcript\n"
|
| 342 |
+
markdown += "Since the input transcript might be very long, it is truncated here for readability:\n\n"
|
| 343 |
+
|
| 344 |
+
for _, row in df.iterrows():
|
| 345 |
+
truncated_input = (row['Input_Transcript'][:500] + '...') if len(row['Input_Transcript']) > 500 else row['Input_Transcript']
|
| 346 |
+
markdown += f"**Recipe ID {row['Recipe_ID']}**:\n\n{truncated_input}\n\n"
|
| 347 |
+
|
| 348 |
+
# Recipe Fields Section
|
| 349 |
+
markdown += "\n### Recipe Fields (Basic Information)\n"
|
| 350 |
markdown += "| Recipe ID | Testing Strategy | Schema Processing Model | Pre-Processing Strategy | Pre-Processing Text | Pre-Processing Model | Prompting Strategy |\n"
|
| 351 |
markdown += "|-----------|------------------|-------------------------|--------------------------|---------------------|----------------------|-------------------|\n"
|
| 352 |
|
|
|
|
| 353 |
for _, row in df.iterrows():
|
|
|
|
| 354 |
markdown += f"| {str(row['Recipe_ID']).ljust(10)} | {str(row['Testing_Strategy_Text']).ljust(20)} | {str(row['Schema_Processing_Model']).ljust(25)} | {str(row['Pre_Processing_Strategy']).ljust(23)} | {str(row['Pre_Processing_Text']).ljust(20)} | {str(row['Pre_Processing_Model']).ljust(20)} | {str(row['Prompting_Strategy']).ljust(25)} |\n"
|
| 355 |
|
| 356 |
+
# Prompts Section
|
| 357 |
markdown += "\n### Prompts\n"
|
| 358 |
markdown += "| Plantings and Fields Prompt | Interactions Prompt | Treatments Prompt |\n"
|
| 359 |
markdown += "|-----------------------------|---------------------|-------------------|\n"
|
| 360 |
|
|
|
|
| 361 |
for _, row in df.iterrows():
|
|
|
|
| 362 |
markdown += f"| {str(row['Plantings_and_Fields_Prompt']).ljust(30)} | {str(row['Interactions_Prompt']).ljust(20)} | {str(row['Treatments_Prompt']).ljust(20)} |\n"
|
| 363 |
|
| 364 |
+
# Side-by-Side Comparison
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 365 |
markdown += "\n### Gold Standard vs Machine Generated Key-Values\n"
|
| 366 |
markdown += "| Key | Gold Standard | Machine Generated |\n"
|
| 367 |
markdown += "|-----|---------------|-------------------|\n"
|
| 368 |
|
|
|
|
| 369 |
for _, row in df.iterrows():
|
| 370 |
markdown += f"| {str(row['Recipe_ID']).ljust(10)} | {str(row['Gold_Standard_Key_Values']).ljust(25)} | {str(row['Machine_Generated_Key_Values']).ljust(25)} |\n"
|
| 371 |
|
| 372 |
+
# Differences Section
|
| 373 |
markdown += "\n### Differences\n"
|
| 374 |
+
markdown += "The following differences were found between the gold standard and the machine-generated output:\n\n"
|
| 375 |
markdown += "| Key | Difference |\n"
|
| 376 |
markdown += "|-----|------------|\n"
|
| 377 |
|
| 378 |
for _, row in df.iterrows():
|
|
|
|
| 379 |
differences = row['Differences']
|
| 380 |
if isinstance(differences, list):
|
| 381 |
for diff in differences:
|
|
|
|
| 382 |
if isinstance(diff, dict) and 'values_changed' in diff:
|
| 383 |
for path, change in diff['values_changed'].items():
|
|
|
|
| 384 |
if 'old_value' in change and 'new_value' in change:
|
| 385 |
markdown += f"| {str(path).ljust(20)} | {str(change['old_value']).ljust(20)} -> {str(change['new_value']).ljust(20)} |\n"
|
| 386 |
else:
|
| 387 |
+
markdown += f"| {str(path).ljust(20)} | (Missing old/new value) |\n"
|
| 388 |
else:
|
| 389 |
+
markdown += f"| (Invalid diff) | |\n"
|
| 390 |
else:
|
| 391 |
+
markdown += f"| (No differences) | |\n"
|
| 392 |
|
| 393 |
+
# YAML Comparison Section
|
| 394 |
markdown += "\n### Gold Standard vs Machine Generated YAML\n"
|
| 395 |
markdown += "| Gold Standard YAML | Machine Generated YAML |\n"
|
| 396 |
markdown += "|--------------------|------------------------|\n"
|
| 397 |
|
|
|
|
| 398 |
for _, row in df.iterrows():
|
| 399 |
+
gold_yaml = yaml.dump(yaml.safe_load(row['Gold_Standard_YAML']), default_flow_style=False)
|
| 400 |
+
machine_yaml = yaml.dump(yaml.safe_load(row['Machine_Generated_YAML']), default_flow_style=False)
|
| 401 |
+
markdown += f"| ```yaml\n{gold_yaml}``` | ```yaml\n{machine_yaml}``` |\n"
|
| 402 |
|
| 403 |
return markdown
|
| 404 |
|
|
|
|
| 405 |
def drive_process():
|
| 406 |
# this is to drive the processing process
|
| 407 |
print("We are starting to DRIVE PROCESS")
|
|
|
|
| 475 |
"Plantings_and_Fields_Prompt": recipe_dict.get("plantings_and_fields_prompt", "N/A"),
|
| 476 |
"Interactions_Prompt": recipe_dict.get("interactions_prompt", "N/A"),
|
| 477 |
"Treatments_Prompt": recipe_dict.get("treatments_prompt", "N/A"),
|
| 478 |
+
"Input_Transcript": input_chunks,
|
| 479 |
"Gold_Standard_Key_Values": gold_standard_json,
|
| 480 |
"Machine_Generated_Key_Values": completed_json,
|
| 481 |
"Differences": differences,
|