Update script_for_automation.py
Browse files- script_for_automation.py +27 -24
script_for_automation.py
CHANGED
|
@@ -352,7 +352,20 @@ def format_json(json_data, truncate_length=500):
|
|
| 352 |
# If it's not valid JSON, return the string as it is
|
| 353 |
return json_data[:truncate_length] + "..." if len(json_data) > truncate_length else json_data
|
| 354 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 355 |
def generate_markdown_output(df):
|
|
|
|
| 356 |
markdown = ""
|
| 357 |
|
| 358 |
# 1. Input Transcript
|
|
@@ -367,11 +380,10 @@ def generate_markdown_output(df):
|
|
| 367 |
"Recipe ID", "Testing Strategy", "Schema Processing Model", "Pre-Processing Strategy",
|
| 368 |
"Pre-Processing Text", "Pre-Processing Model", "Prompting Strategy"
|
| 369 |
]
|
| 370 |
-
# Generate table
|
| 371 |
recipe_table = "| " + " | ".join(recipe_columns) + " |\n"
|
| 372 |
recipe_table += "| " + " | ".join(["-" * len(col) for col in recipe_columns]) + " |\n"
|
| 373 |
for _, row in df.iterrows():
|
| 374 |
-
recipe_table += "|
|
| 375 |
markdown += recipe_table + "\n"
|
| 376 |
|
| 377 |
# 3. Prompts
|
|
@@ -380,54 +392,45 @@ def generate_markdown_output(df):
|
|
| 380 |
prompt_table = "| " + " | ".join(prompt_columns) + " |\n"
|
| 381 |
prompt_table += "| " + " | ".join(["-" * len(col) for col in prompt_columns]) + " |\n"
|
| 382 |
for _, row in df.iterrows():
|
| 383 |
-
prompt_table += "|
|
| 384 |
markdown += prompt_table + "\n"
|
| 385 |
|
| 386 |
-
# 4.
|
| 387 |
-
markdown += "\n## Gold Standard vs Machine Generated Key-Values\n"
|
| 388 |
markdown += "| Key | Gold Standard | Machine Generated |\n"
|
| 389 |
markdown += "|-----|---------------|-------------------|\n"
|
| 390 |
for _, row in df.iterrows():
|
| 391 |
-
|
| 392 |
-
gold_standard = format_json(row['Gold_Standard_Key_Values'])
|
| 393 |
-
machine_generated = format_json(row['Machine_Generated_Key_Values'])
|
| 394 |
-
markdown += f"| {row['Recipe_ID']} | {gold_standard} | {machine_generated} |\n"
|
| 395 |
markdown += "\n"
|
| 396 |
|
| 397 |
# 5. Differences
|
| 398 |
-
markdown += "\n## Differences\n"
|
| 399 |
markdown += "| Key | Difference |\n"
|
| 400 |
markdown += "|-----|------------|\n"
|
| 401 |
for _, row in df.iterrows():
|
| 402 |
differences = row['Differences']
|
| 403 |
if isinstance(differences, list):
|
| 404 |
-
|
| 405 |
-
|
| 406 |
-
|
| 407 |
-
|
| 408 |
-
|
| 409 |
-
markdown += f"| {path} | {change['old_value']} → {change['new_value']} |\n"
|
| 410 |
-
else:
|
| 411 |
-
markdown += f"| {row['Recipe_ID']} | No differences found |\n"
|
| 412 |
else:
|
| 413 |
-
markdown +=
|
| 414 |
-
markdown += "\n"
|
| 415 |
|
| 416 |
# 6. YAML Comparisons
|
| 417 |
-
markdown = "\n## Gold Standard vs Machine Generated YAML\n"
|
| 418 |
-
|
| 419 |
for _, row in df.iterrows():
|
| 420 |
-
# Ensure YAML data is properly loaded and formatted
|
| 421 |
gold_yaml = yaml.safe_dump(yaml.safe_load(row['Gold_Standard_YAML']), default_flow_style=False)
|
| 422 |
machine_yaml = yaml.safe_dump(yaml.safe_load(row['Machine_Generated_YAML']), default_flow_style=False)
|
| 423 |
|
| 424 |
-
# Add comparison to markdown
|
| 425 |
markdown += f"**Recipe ID {row['Recipe_ID']}**:\n\n"
|
| 426 |
markdown += "**Gold Standard YAML:**\n"
|
| 427 |
markdown += f"```yaml\n{gold_yaml}\n```\n"
|
| 428 |
markdown += "**Machine Generated YAML:**\n"
|
| 429 |
markdown += f"```yaml\n{machine_yaml}\n```\n\n"
|
| 430 |
|
|
|
|
| 431 |
markdown += "---\n\n"
|
| 432 |
return markdown
|
| 433 |
|
|
|
|
| 352 |
# If it's not valid JSON, return the string as it is
|
| 353 |
return json_data[:truncate_length] + "..." if len(json_data) > truncate_length else json_data
|
| 354 |
|
| 355 |
+
import yaml
|
| 356 |
+
|
| 357 |
+
def sanitize_json_for_yaml(data):
|
| 358 |
+
if isinstance(data, dict):
|
| 359 |
+
return {key: sanitize_json_for_yaml(value) for key, value in data.items()}
|
| 360 |
+
elif isinstance(data, list):
|
| 361 |
+
return [sanitize_json_for_yaml(item) for item in data]
|
| 362 |
+
elif isinstance(data, tuple): # Convert tuples to lists
|
| 363 |
+
return list(data)
|
| 364 |
+
else:
|
| 365 |
+
return data # Keep other types as-is
|
| 366 |
+
|
| 367 |
def generate_markdown_output(df):
|
| 368 |
+
# Start the markdown output string
|
| 369 |
markdown = ""
|
| 370 |
|
| 371 |
# 1. Input Transcript
|
|
|
|
| 380 |
"Recipe ID", "Testing Strategy", "Schema Processing Model", "Pre-Processing Strategy",
|
| 381 |
"Pre-Processing Text", "Pre-Processing Model", "Prompting Strategy"
|
| 382 |
]
|
|
|
|
| 383 |
recipe_table = "| " + " | ".join(recipe_columns) + " |\n"
|
| 384 |
recipe_table += "| " + " | ".join(["-" * len(col) for col in recipe_columns]) + " |\n"
|
| 385 |
for _, row in df.iterrows():
|
| 386 |
+
recipe_table += f"| {row['Recipe_ID']} | {row['Testing_Strategy_Text']} | {row['Schema_Processing_Model']} | {row['Pre_Processing_Strategy']} | {row['Pre_Processing_Text']} | {row['Pre_Processing_Model']} | {row['Prompting_Strategy']} |\n"
|
| 387 |
markdown += recipe_table + "\n"
|
| 388 |
|
| 389 |
# 3. Prompts
|
|
|
|
| 392 |
prompt_table = "| " + " | ".join(prompt_columns) + " |\n"
|
| 393 |
prompt_table += "| " + " | ".join(["-" * len(col) for col in prompt_columns]) + " |\n"
|
| 394 |
for _, row in df.iterrows():
|
| 395 |
+
prompt_table += f"| {row['Plantings_and_Fields_Prompt']} | {row['Interactions_Prompt']} | {row['Treatments_Prompt']} |\n"
|
| 396 |
markdown += prompt_table + "\n"
|
| 397 |
|
| 398 |
+
# 4. Side-by-Side Comparisons
|
| 399 |
+
markdown += "\n## Gold Standard vs Machine Generated Key-Values\n" # Add space before header for consistency
|
| 400 |
markdown += "| Key | Gold Standard | Machine Generated |\n"
|
| 401 |
markdown += "|-----|---------------|-------------------|\n"
|
| 402 |
for _, row in df.iterrows():
|
| 403 |
+
markdown += f"| {row['Recipe_ID']} | {row['Gold_Standard_Key_Values']} | {row['Machine_Generated_Key_Values']} |\n"
|
|
|
|
|
|
|
|
|
|
| 404 |
markdown += "\n"
|
| 405 |
|
| 406 |
# 5. Differences
|
| 407 |
+
markdown += "\n## Differences\n" # Add space before header for consistency
|
| 408 |
markdown += "| Key | Difference |\n"
|
| 409 |
markdown += "|-----|------------|\n"
|
| 410 |
for _, row in df.iterrows():
|
| 411 |
differences = row['Differences']
|
| 412 |
if isinstance(differences, list):
|
| 413 |
+
for diff in differences:
|
| 414 |
+
if isinstance(diff, dict) and 'values_changed' in diff:
|
| 415 |
+
for path, change in diff['values_changed'].items():
|
| 416 |
+
if 'old_value' in change and 'new_value' in change:
|
| 417 |
+
markdown += f"| {path} | {change['old_value']} → {change['new_value']} |\n"
|
|
|
|
|
|
|
|
|
|
| 418 |
else:
|
| 419 |
+
markdown += "| No differences found | |\n"
|
|
|
|
| 420 |
|
| 421 |
# 6. YAML Comparisons
|
| 422 |
+
markdown += "\n## Gold Standard vs Machine Generated YAML\n" # Add space before header for consistency
|
|
|
|
| 423 |
for _, row in df.iterrows():
|
|
|
|
| 424 |
gold_yaml = yaml.safe_dump(yaml.safe_load(row['Gold_Standard_YAML']), default_flow_style=False)
|
| 425 |
machine_yaml = yaml.safe_dump(yaml.safe_load(row['Machine_Generated_YAML']), default_flow_style=False)
|
| 426 |
|
|
|
|
| 427 |
markdown += f"**Recipe ID {row['Recipe_ID']}**:\n\n"
|
| 428 |
markdown += "**Gold Standard YAML:**\n"
|
| 429 |
markdown += f"```yaml\n{gold_yaml}\n```\n"
|
| 430 |
markdown += "**Machine Generated YAML:**\n"
|
| 431 |
markdown += f"```yaml\n{machine_yaml}\n```\n\n"
|
| 432 |
|
| 433 |
+
# Ensure clean separation
|
| 434 |
markdown += "---\n\n"
|
| 435 |
return markdown
|
| 436 |
|