rosemariafontana commited on
Commit
08a1772
·
verified ·
1 Parent(s): 5fadba9

Update script_for_automation.py

Browse files
Files changed (1) hide show
  1. script_for_automation.py +27 -24
script_for_automation.py CHANGED
@@ -352,7 +352,20 @@ def format_json(json_data, truncate_length=500):
352
  # If it's not valid JSON, return the string as it is
353
  return json_data[:truncate_length] + "..." if len(json_data) > truncate_length else json_data
354
 
 
 
 
 
 
 
 
 
 
 
 
 
355
  def generate_markdown_output(df):
 
356
  markdown = ""
357
 
358
  # 1. Input Transcript
@@ -367,11 +380,10 @@ def generate_markdown_output(df):
367
  "Recipe ID", "Testing Strategy", "Schema Processing Model", "Pre-Processing Strategy",
368
  "Pre-Processing Text", "Pre-Processing Model", "Prompting Strategy"
369
  ]
370
- # Generate table
371
  recipe_table = "| " + " | ".join(recipe_columns) + " |\n"
372
  recipe_table += "| " + " | ".join(["-" * len(col) for col in recipe_columns]) + " |\n"
373
  for _, row in df.iterrows():
374
- recipe_table += "| " + " | ".join([str(row[col]) for col in recipe_columns]) + " |\n"
375
  markdown += recipe_table + "\n"
376
 
377
  # 3. Prompts
@@ -380,54 +392,45 @@ def generate_markdown_output(df):
380
  prompt_table = "| " + " | ".join(prompt_columns) + " |\n"
381
  prompt_table += "| " + " | ".join(["-" * len(col) for col in prompt_columns]) + " |\n"
382
  for _, row in df.iterrows():
383
- prompt_table += "| " + " | ".join([str(row[col]) for col in prompt_columns]) + " |\n"
384
  markdown += prompt_table + "\n"
385
 
386
- # 4. Gold Standard vs Machine Generated Key-Values
387
- markdown += "\n## Gold Standard vs Machine Generated Key-Values\n"
388
  markdown += "| Key | Gold Standard | Machine Generated |\n"
389
  markdown += "|-----|---------------|-------------------|\n"
390
  for _, row in df.iterrows():
391
- # Truncate or format the JSON-like data
392
- gold_standard = format_json(row['Gold_Standard_Key_Values'])
393
- machine_generated = format_json(row['Machine_Generated_Key_Values'])
394
- markdown += f"| {row['Recipe_ID']} | {gold_standard} | {machine_generated} |\n"
395
  markdown += "\n"
396
 
397
  # 5. Differences
398
- markdown += "\n## Differences\n"
399
  markdown += "| Key | Difference |\n"
400
  markdown += "|-----|------------|\n"
401
  for _, row in df.iterrows():
402
  differences = row['Differences']
403
  if isinstance(differences, list):
404
- if len(differences) > 0:
405
- for diff in differences:
406
- if isinstance(diff, dict) and 'values_changed' in diff:
407
- for path, change in diff['values_changed'].items():
408
- if 'old_value' in change and 'new_value' in change:
409
- markdown += f"| {path} | {change['old_value']} → {change['new_value']} |\n"
410
- else:
411
- markdown += f"| {row['Recipe_ID']} | No differences found |\n"
412
  else:
413
- markdown += f"| {row['Recipe_ID']} | No differences found |\n"
414
- markdown += "\n"
415
 
416
  # 6. YAML Comparisons
417
- markdown = "\n## Gold Standard vs Machine Generated YAML\n"
418
-
419
  for _, row in df.iterrows():
420
- # Ensure YAML data is properly loaded and formatted
421
  gold_yaml = yaml.safe_dump(yaml.safe_load(row['Gold_Standard_YAML']), default_flow_style=False)
422
  machine_yaml = yaml.safe_dump(yaml.safe_load(row['Machine_Generated_YAML']), default_flow_style=False)
423
 
424
- # Add comparison to markdown
425
  markdown += f"**Recipe ID {row['Recipe_ID']}**:\n\n"
426
  markdown += "**Gold Standard YAML:**\n"
427
  markdown += f"```yaml\n{gold_yaml}\n```\n"
428
  markdown += "**Machine Generated YAML:**\n"
429
  markdown += f"```yaml\n{machine_yaml}\n```\n\n"
430
 
 
431
  markdown += "---\n\n"
432
  return markdown
433
 
 
352
  # If it's not valid JSON, return the string as it is
353
  return json_data[:truncate_length] + "..." if len(json_data) > truncate_length else json_data
354
 
355
+ import yaml
356
+
357
+ def sanitize_json_for_yaml(data):
358
+ if isinstance(data, dict):
359
+ return {key: sanitize_json_for_yaml(value) for key, value in data.items()}
360
+ elif isinstance(data, list):
361
+ return [sanitize_json_for_yaml(item) for item in data]
362
+ elif isinstance(data, tuple): # Convert tuples to lists
363
+ return list(data)
364
+ else:
365
+ return data # Keep other types as-is
366
+
367
  def generate_markdown_output(df):
368
+ # Start the markdown output string
369
  markdown = ""
370
 
371
  # 1. Input Transcript
 
380
  "Recipe ID", "Testing Strategy", "Schema Processing Model", "Pre-Processing Strategy",
381
  "Pre-Processing Text", "Pre-Processing Model", "Prompting Strategy"
382
  ]
 
383
  recipe_table = "| " + " | ".join(recipe_columns) + " |\n"
384
  recipe_table += "| " + " | ".join(["-" * len(col) for col in recipe_columns]) + " |\n"
385
  for _, row in df.iterrows():
386
+ recipe_table += f"| {row['Recipe_ID']} | {row['Testing_Strategy_Text']} | {row['Schema_Processing_Model']} | {row['Pre_Processing_Strategy']} | {row['Pre_Processing_Text']} | {row['Pre_Processing_Model']} | {row['Prompting_Strategy']} |\n"
387
  markdown += recipe_table + "\n"
388
 
389
  # 3. Prompts
 
392
  prompt_table = "| " + " | ".join(prompt_columns) + " |\n"
393
  prompt_table += "| " + " | ".join(["-" * len(col) for col in prompt_columns]) + " |\n"
394
  for _, row in df.iterrows():
395
+ prompt_table += f"| {row['Plantings_and_Fields_Prompt']} | {row['Interactions_Prompt']} | {row['Treatments_Prompt']} |\n"
396
  markdown += prompt_table + "\n"
397
 
398
+ # 4. Side-by-Side Comparisons
399
+ markdown += "\n## Gold Standard vs Machine Generated Key-Values\n" # Add space before header for consistency
400
  markdown += "| Key | Gold Standard | Machine Generated |\n"
401
  markdown += "|-----|---------------|-------------------|\n"
402
  for _, row in df.iterrows():
403
+ markdown += f"| {row['Recipe_ID']} | {row['Gold_Standard_Key_Values']} | {row['Machine_Generated_Key_Values']} |\n"
 
 
 
404
  markdown += "\n"
405
 
406
  # 5. Differences
407
+ markdown += "\n## Differences\n" # Add space before header for consistency
408
  markdown += "| Key | Difference |\n"
409
  markdown += "|-----|------------|\n"
410
  for _, row in df.iterrows():
411
  differences = row['Differences']
412
  if isinstance(differences, list):
413
+ for diff in differences:
414
+ if isinstance(diff, dict) and 'values_changed' in diff:
415
+ for path, change in diff['values_changed'].items():
416
+ if 'old_value' in change and 'new_value' in change:
417
+ markdown += f"| {path} | {change['old_value']} {change['new_value']} |\n"
 
 
 
418
  else:
419
+ markdown += "| No differences found | |\n"
 
420
 
421
  # 6. YAML Comparisons
422
+ markdown += "\n## Gold Standard vs Machine Generated YAML\n" # Add space before header for consistency
 
423
  for _, row in df.iterrows():
 
424
  gold_yaml = yaml.safe_dump(yaml.safe_load(row['Gold_Standard_YAML']), default_flow_style=False)
425
  machine_yaml = yaml.safe_dump(yaml.safe_load(row['Machine_Generated_YAML']), default_flow_style=False)
426
 
 
427
  markdown += f"**Recipe ID {row['Recipe_ID']}**:\n\n"
428
  markdown += "**Gold Standard YAML:**\n"
429
  markdown += f"```yaml\n{gold_yaml}\n```\n"
430
  markdown += "**Machine Generated YAML:**\n"
431
  markdown += f"```yaml\n{machine_yaml}\n```\n\n"
432
 
433
+ # Ensure clean separation
434
  markdown += "---\n\n"
435
  return markdown
436