Update script_for_automation.py
Browse files- script_for_automation.py +44 -45
script_for_automation.py
CHANGED
|
@@ -329,52 +329,51 @@ def get_data_ready(recipe_dict, input_data_piece):
|
|
| 329 |
|
| 330 |
print("DID THAT NOW")
|
| 331 |
return processed_data
|
| 332 |
-
|
| 333 |
-
import yaml
|
| 334 |
-
import json
|
| 335 |
|
| 336 |
def generate_markdown_output(df):
|
| 337 |
# Start the markdown output string
|
| 338 |
markdown = ""
|
| 339 |
|
| 340 |
-
# Input Transcript
|
| 341 |
-
markdown += "##
|
| 342 |
-
markdown += "Since the input transcript might be very long, it is truncated here for readability:\n\n"
|
| 343 |
-
|
| 344 |
for _, row in df.iterrows():
|
| 345 |
-
truncated_input =
|
| 346 |
-
markdown += f"**Recipe ID {row['Recipe_ID']}**:\n\n{truncated_input}\n\n"
|
| 347 |
-
|
| 348 |
-
# Recipe Fields
|
| 349 |
-
markdown += "
|
| 350 |
-
|
| 351 |
-
|
| 352 |
-
|
|
|
|
|
|
|
|
|
|
| 353 |
for _, row in df.iterrows():
|
| 354 |
-
|
| 355 |
-
|
| 356 |
-
|
| 357 |
-
|
| 358 |
-
markdown += "
|
| 359 |
-
|
| 360 |
-
|
|
|
|
| 361 |
for _, row in df.iterrows():
|
| 362 |
-
|
|
|
|
| 363 |
|
| 364 |
-
# Side-by-Side
|
| 365 |
-
markdown += "
|
| 366 |
markdown += "| Key | Gold Standard | Machine Generated |\n"
|
| 367 |
markdown += "|-----|---------------|-------------------|\n"
|
| 368 |
-
|
| 369 |
for _, row in df.iterrows():
|
| 370 |
-
markdown += f"| {
|
|
|
|
| 371 |
|
| 372 |
-
#
|
| 373 |
-
markdown += "
|
| 374 |
-
markdown += "The following differences were found between the gold standard and the machine-generated output:\n\n"
|
| 375 |
markdown += "| Key | Difference |\n"
|
| 376 |
markdown += "|-----|------------|\n"
|
| 377 |
-
|
| 378 |
for _, row in df.iterrows():
|
| 379 |
differences = row['Differences']
|
| 380 |
if isinstance(differences, list):
|
|
@@ -382,24 +381,24 @@ def generate_markdown_output(df):
|
|
| 382 |
if isinstance(diff, dict) and 'values_changed' in diff:
|
| 383 |
for path, change in diff['values_changed'].items():
|
| 384 |
if 'old_value' in change and 'new_value' in change:
|
| 385 |
-
markdown += f"| {
|
| 386 |
-
else:
|
| 387 |
-
markdown += f"| {str(path).ljust(20)} | (Missing old/new value) |\n"
|
| 388 |
-
else:
|
| 389 |
-
markdown += f"| (Invalid diff) | |\n"
|
| 390 |
else:
|
| 391 |
-
markdown +=
|
| 392 |
-
|
| 393 |
-
# YAML Comparison Section
|
| 394 |
-
markdown += "\n### Gold Standard vs Machine Generated YAML\n"
|
| 395 |
-
markdown += "| Gold Standard YAML | Machine Generated YAML |\n"
|
| 396 |
-
markdown += "|--------------------|------------------------|\n"
|
| 397 |
|
|
|
|
|
|
|
| 398 |
for _, row in df.iterrows():
|
| 399 |
-
gold_yaml = yaml.
|
| 400 |
-
machine_yaml = yaml.
|
| 401 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 402 |
|
|
|
|
|
|
|
| 403 |
return markdown
|
| 404 |
|
| 405 |
def drive_process():
|
|
|
|
| 329 |
|
| 330 |
print("DID THAT NOW")
|
| 331 |
return processed_data
|
| 332 |
+
|
|
|
|
|
|
|
| 333 |
|
| 334 |
def generate_markdown_output(df):
|
| 335 |
# Start the markdown output string
|
| 336 |
markdown = ""
|
| 337 |
|
| 338 |
+
# 1. Input Transcript
|
| 339 |
+
markdown += "## Input Transcript\n"
|
|
|
|
|
|
|
| 340 |
for _, row in df.iterrows():
|
| 341 |
+
truncated_input = row['Input_Transcript'][:500] + "..." if len(row['Input_Transcript']) > 500 else row['Input_Transcript']
|
| 342 |
+
markdown += f"**Recipe ID {row['Recipe_ID']}**:\n```\n{truncated_input}\n```\n\n"
|
| 343 |
+
|
| 344 |
+
# 2. Recipe Fields
|
| 345 |
+
markdown += "## Recipe Fields\n"
|
| 346 |
+
recipe_columns = [
|
| 347 |
+
"Recipe ID", "Testing Strategy", "Schema Processing Model", "Pre-Processing Strategy",
|
| 348 |
+
"Pre-Processing Text", "Pre-Processing Model", "Prompting Strategy"
|
| 349 |
+
]
|
| 350 |
+
recipe_table = "| " + " | ".join(recipe_columns) + " |\n"
|
| 351 |
+
recipe_table += "| " + " | ".join(["-" * len(col) for col in recipe_columns]) + " |\n"
|
| 352 |
for _, row in df.iterrows():
|
| 353 |
+
recipe_table += f"| {row['Recipe_ID']} | {row['Testing_Strategy_Text']} | {row['Schema_Processing_Model']} | {row['Pre_Processing_Strategy']} | {row['Pre_Processing_Text']} | {row['Pre_Processing_Model']} | {row['Prompting_Strategy']} |\n"
|
| 354 |
+
markdown += recipe_table + "\n"
|
| 355 |
+
|
| 356 |
+
# 3. Prompts
|
| 357 |
+
markdown += "## Prompts\n"
|
| 358 |
+
prompt_columns = ["Plantings and Fields Prompt", "Interactions Prompt", "Treatments Prompt"]
|
| 359 |
+
prompt_table = "| " + " | ".join(prompt_columns) + " |\n"
|
| 360 |
+
prompt_table += "| " + " | ".join(["-" * len(col) for col in prompt_columns]) + " |\n"
|
| 361 |
for _, row in df.iterrows():
|
| 362 |
+
prompt_table += f"| {row['Plantings_and_Fields_Prompt']} | {row['Interactions_Prompt']} | {row['Treatments_Prompt']} |\n"
|
| 363 |
+
markdown += prompt_table + "\n"
|
| 364 |
|
| 365 |
+
# 4. Side-by-Side Comparisons
|
| 366 |
+
markdown += "## Gold Standard vs Machine Generated Key-Values\n"
|
| 367 |
markdown += "| Key | Gold Standard | Machine Generated |\n"
|
| 368 |
markdown += "|-----|---------------|-------------------|\n"
|
|
|
|
| 369 |
for _, row in df.iterrows():
|
| 370 |
+
markdown += f"| {row['Recipe_ID']} | {row['Gold_Standard_Key_Values']} | {row['Machine_Generated_Key_Values']} |\n"
|
| 371 |
+
markdown += "\n"
|
| 372 |
|
| 373 |
+
# 5. Differences
|
| 374 |
+
markdown += "## Differences\n"
|
|
|
|
| 375 |
markdown += "| Key | Difference |\n"
|
| 376 |
markdown += "|-----|------------|\n"
|
|
|
|
| 377 |
for _, row in df.iterrows():
|
| 378 |
differences = row['Differences']
|
| 379 |
if isinstance(differences, list):
|
|
|
|
| 381 |
if isinstance(diff, dict) and 'values_changed' in diff:
|
| 382 |
for path, change in diff['values_changed'].items():
|
| 383 |
if 'old_value' in change and 'new_value' in change:
|
| 384 |
+
markdown += f"| {path} | {change['old_value']} → {change['new_value']} |\n"
|
|
|
|
|
|
|
|
|
|
|
|
|
| 385 |
else:
|
| 386 |
+
markdown += "| No differences found | |\n"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 387 |
|
| 388 |
+
# 6. YAML Comparisons
|
| 389 |
+
markdown += "## Gold Standard vs Machine Generated YAML\n"
|
| 390 |
for _, row in df.iterrows():
|
| 391 |
+
gold_yaml = yaml.safe_dump(yaml.safe_load(row['Gold_Standard_YAML']), default_flow_style=False)
|
| 392 |
+
machine_yaml = yaml.safe_dump(yaml.safe_load(row['Machine_Generated_YAML']), default_flow_style=False)
|
| 393 |
+
|
| 394 |
+
markdown += f"**Recipe ID {row['Recipe_ID']}**:\n\n"
|
| 395 |
+
markdown += "**Gold Standard YAML:**\n"
|
| 396 |
+
markdown += f"```yaml\n{gold_yaml}\n```\n"
|
| 397 |
+
markdown += "**Machine Generated YAML:**\n"
|
| 398 |
+
markdown += f"```yaml\n{machine_yaml}\n```\n\n"
|
| 399 |
|
| 400 |
+
# Ensure clean separation
|
| 401 |
+
markdown += "---\n\n"
|
| 402 |
return markdown
|
| 403 |
|
| 404 |
def drive_process():
|