Update script_for_automation.py
Browse files- script_for_automation.py +39 -18
script_for_automation.py
CHANGED
|
@@ -340,73 +340,94 @@ def sanitize_json_for_yaml(data):
|
|
| 340 |
else:
|
| 341 |
return data # Keep other types as-is
|
| 342 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 343 |
def generate_markdown_output(df):
|
| 344 |
-
# Start the markdown output string
|
| 345 |
markdown = ""
|
| 346 |
|
| 347 |
# 1. Input Transcript
|
| 348 |
-
markdown += "## Input Transcript\n"
|
| 349 |
for _, row in df.iterrows():
|
| 350 |
truncated_input = row['Input_Transcript'][:500] + "..." if len(row['Input_Transcript']) > 500 else row['Input_Transcript']
|
| 351 |
markdown += f"**Recipe ID {row['Recipe_ID']}**:\n```\n{truncated_input}\n```\n\n"
|
| 352 |
|
| 353 |
# 2. Recipe Fields
|
| 354 |
-
markdown += "## Recipe Fields\n"
|
| 355 |
recipe_columns = [
|
| 356 |
"Recipe ID", "Testing Strategy", "Schema Processing Model", "Pre-Processing Strategy",
|
| 357 |
"Pre-Processing Text", "Pre-Processing Model", "Prompting Strategy"
|
| 358 |
]
|
|
|
|
| 359 |
recipe_table = "| " + " | ".join(recipe_columns) + " |\n"
|
| 360 |
recipe_table += "| " + " | ".join(["-" * len(col) for col in recipe_columns]) + " |\n"
|
| 361 |
for _, row in df.iterrows():
|
| 362 |
-
recipe_table +=
|
| 363 |
markdown += recipe_table + "\n"
|
| 364 |
|
| 365 |
# 3. Prompts
|
| 366 |
-
markdown += "## Prompts\n"
|
| 367 |
prompt_columns = ["Plantings and Fields Prompt", "Interactions Prompt", "Treatments Prompt"]
|
| 368 |
prompt_table = "| " + " | ".join(prompt_columns) + " |\n"
|
| 369 |
prompt_table += "| " + " | ".join(["-" * len(col) for col in prompt_columns]) + " |\n"
|
| 370 |
for _, row in df.iterrows():
|
| 371 |
-
prompt_table +=
|
| 372 |
markdown += prompt_table + "\n"
|
| 373 |
|
| 374 |
-
# 4.
|
| 375 |
-
markdown += "## Gold Standard vs Machine Generated Key-Values\n"
|
| 376 |
markdown += "| Key | Gold Standard | Machine Generated |\n"
|
| 377 |
markdown += "|-----|---------------|-------------------|\n"
|
| 378 |
for _, row in df.iterrows():
|
| 379 |
-
|
|
|
|
|
|
|
|
|
|
| 380 |
markdown += "\n"
|
| 381 |
|
| 382 |
# 5. Differences
|
| 383 |
-
markdown += "## Differences\n"
|
| 384 |
markdown += "| Key | Difference |\n"
|
| 385 |
markdown += "|-----|------------|\n"
|
| 386 |
for _, row in df.iterrows():
|
| 387 |
differences = row['Differences']
|
| 388 |
if isinstance(differences, list):
|
| 389 |
-
|
| 390 |
-
|
| 391 |
-
|
| 392 |
-
|
| 393 |
-
|
|
|
|
|
|
|
|
|
|
| 394 |
else:
|
| 395 |
-
markdown += "| No differences found |
|
|
|
|
| 396 |
|
| 397 |
# 6. YAML Comparisons
|
| 398 |
-
markdown
|
|
|
|
| 399 |
for _, row in df.iterrows():
|
|
|
|
| 400 |
gold_yaml = yaml.safe_dump(yaml.safe_load(row['Gold_Standard_YAML']), default_flow_style=False)
|
| 401 |
machine_yaml = yaml.safe_dump(yaml.safe_load(row['Machine_Generated_YAML']), default_flow_style=False)
|
| 402 |
|
|
|
|
| 403 |
markdown += f"**Recipe ID {row['Recipe_ID']}**:\n\n"
|
| 404 |
markdown += "**Gold Standard YAML:**\n"
|
| 405 |
markdown += f"```yaml\n{gold_yaml}\n```\n"
|
| 406 |
markdown += "**Machine Generated YAML:**\n"
|
| 407 |
markdown += f"```yaml\n{machine_yaml}\n```\n\n"
|
| 408 |
|
| 409 |
-
# Ensure clean separation
|
| 410 |
markdown += "---\n\n"
|
| 411 |
return markdown
|
| 412 |
|
|
|
|
| 340 |
else:
|
| 341 |
return data # Keep other types as-is
|
| 342 |
|
| 343 |
+
def format_json(json_data, truncate_length=500):
|
| 344 |
+
try:
|
| 345 |
+
# Try to load the JSON data
|
| 346 |
+
parsed_data = json.loads(json_data)
|
| 347 |
+
# Convert it into a pretty-printed string
|
| 348 |
+
formatted_json = json.dumps(parsed_data, indent=2)
|
| 349 |
+
# Truncate if it's too long
|
| 350 |
+
return formatted_json[:truncate_length] + "..." if len(formatted_json) > truncate_length else formatted_json
|
| 351 |
+
except json.JSONDecodeError:
|
| 352 |
+
# If it's not valid JSON, return the string as it is
|
| 353 |
+
return json_data[:truncate_length] + "..." if len(json_data) > truncate_length else json_data
|
| 354 |
+
|
| 355 |
def generate_markdown_output(df):
|
|
|
|
| 356 |
markdown = ""
|
| 357 |
|
| 358 |
# 1. Input Transcript
|
| 359 |
+
markdown += "\n## Input Transcript\n" # Add space before header for consistency
|
| 360 |
for _, row in df.iterrows():
|
| 361 |
truncated_input = row['Input_Transcript'][:500] + "..." if len(row['Input_Transcript']) > 500 else row['Input_Transcript']
|
| 362 |
markdown += f"**Recipe ID {row['Recipe_ID']}**:\n```\n{truncated_input}\n```\n\n"
|
| 363 |
|
| 364 |
# 2. Recipe Fields
|
| 365 |
+
markdown += "\n## Recipe Fields\n" # Add space before header for consistency
|
| 366 |
recipe_columns = [
|
| 367 |
"Recipe ID", "Testing Strategy", "Schema Processing Model", "Pre-Processing Strategy",
|
| 368 |
"Pre-Processing Text", "Pre-Processing Model", "Prompting Strategy"
|
| 369 |
]
|
| 370 |
+
# Generate table
|
| 371 |
recipe_table = "| " + " | ".join(recipe_columns) + " |\n"
|
| 372 |
recipe_table += "| " + " | ".join(["-" * len(col) for col in recipe_columns]) + " |\n"
|
| 373 |
for _, row in df.iterrows():
|
| 374 |
+
recipe_table += "| " + " | ".join([str(row[col]) for col in recipe_columns]) + " |\n"
|
| 375 |
markdown += recipe_table + "\n"
|
| 376 |
|
| 377 |
# 3. Prompts
|
| 378 |
+
markdown += "\n## Prompts\n" # Add space before header for consistency
|
| 379 |
prompt_columns = ["Plantings and Fields Prompt", "Interactions Prompt", "Treatments Prompt"]
|
| 380 |
prompt_table = "| " + " | ".join(prompt_columns) + " |\n"
|
| 381 |
prompt_table += "| " + " | ".join(["-" * len(col) for col in prompt_columns]) + " |\n"
|
| 382 |
for _, row in df.iterrows():
|
| 383 |
+
prompt_table += "| " + " | ".join([str(row[col]) for col in prompt_columns]) + " |\n"
|
| 384 |
markdown += prompt_table + "\n"
|
| 385 |
|
| 386 |
+
# 4. Gold Standard vs Machine Generated Key-Values
|
| 387 |
+
markdown += "\n## Gold Standard vs Machine Generated Key-Values\n"
|
| 388 |
markdown += "| Key | Gold Standard | Machine Generated |\n"
|
| 389 |
markdown += "|-----|---------------|-------------------|\n"
|
| 390 |
for _, row in df.iterrows():
|
| 391 |
+
# Truncate or format the JSON-like data
|
| 392 |
+
gold_standard = format_json(row['Gold_Standard_Key_Values'])
|
| 393 |
+
machine_generated = format_json(row['Machine_Generated_Key_Values'])
|
| 394 |
+
markdown += f"| {row['Recipe_ID']} | {gold_standard} | {machine_generated} |\n"
|
| 395 |
markdown += "\n"
|
| 396 |
|
| 397 |
# 5. Differences
|
| 398 |
+
markdown += "\n## Differences\n"
|
| 399 |
markdown += "| Key | Difference |\n"
|
| 400 |
markdown += "|-----|------------|\n"
|
| 401 |
for _, row in df.iterrows():
|
| 402 |
differences = row['Differences']
|
| 403 |
if isinstance(differences, list):
|
| 404 |
+
if len(differences) > 0:
|
| 405 |
+
for diff in differences:
|
| 406 |
+
if isinstance(diff, dict) and 'values_changed' in diff:
|
| 407 |
+
for path, change in diff['values_changed'].items():
|
| 408 |
+
if 'old_value' in change and 'new_value' in change:
|
| 409 |
+
markdown += f"| {path} | {change['old_value']} → {change['new_value']} |\n"
|
| 410 |
+
else:
|
| 411 |
+
markdown += f"| {row['Recipe_ID']} | No differences found |\n"
|
| 412 |
else:
|
| 413 |
+
markdown += f"| {row['Recipe_ID']} | No differences found |\n"
|
| 414 |
+
markdown += "\n"
|
| 415 |
|
| 416 |
# 6. YAML Comparisons
|
| 417 |
+
markdown = "\n## Gold Standard vs Machine Generated YAML\n"
|
| 418 |
+
|
| 419 |
for _, row in df.iterrows():
|
| 420 |
+
# Ensure YAML data is properly loaded and formatted
|
| 421 |
gold_yaml = yaml.safe_dump(yaml.safe_load(row['Gold_Standard_YAML']), default_flow_style=False)
|
| 422 |
machine_yaml = yaml.safe_dump(yaml.safe_load(row['Machine_Generated_YAML']), default_flow_style=False)
|
| 423 |
|
| 424 |
+
# Add comparison to markdown
|
| 425 |
markdown += f"**Recipe ID {row['Recipe_ID']}**:\n\n"
|
| 426 |
markdown += "**Gold Standard YAML:**\n"
|
| 427 |
markdown += f"```yaml\n{gold_yaml}\n```\n"
|
| 428 |
markdown += "**Machine Generated YAML:**\n"
|
| 429 |
markdown += f"```yaml\n{machine_yaml}\n```\n\n"
|
| 430 |
|
|
|
|
| 431 |
markdown += "---\n\n"
|
| 432 |
return markdown
|
| 433 |
|