rosemariafontana commited on
Commit
5fadba9
·
verified ·
1 Parent(s): c03a11f

Update script_for_automation.py

Browse files
Files changed (1) hide show
  1. script_for_automation.py +39 -18
script_for_automation.py CHANGED
@@ -340,73 +340,94 @@ def sanitize_json_for_yaml(data):
340
  else:
341
  return data # Keep other types as-is
342
 
 
 
 
 
 
 
 
 
 
 
 
 
343
  def generate_markdown_output(df):
344
- # Start the markdown output string
345
  markdown = ""
346
 
347
  # 1. Input Transcript
348
- markdown += "## Input Transcript\n"
349
  for _, row in df.iterrows():
350
  truncated_input = row['Input_Transcript'][:500] + "..." if len(row['Input_Transcript']) > 500 else row['Input_Transcript']
351
  markdown += f"**Recipe ID {row['Recipe_ID']}**:\n```\n{truncated_input}\n```\n\n"
352
 
353
  # 2. Recipe Fields
354
- markdown += "## Recipe Fields\n"
355
  recipe_columns = [
356
  "Recipe ID", "Testing Strategy", "Schema Processing Model", "Pre-Processing Strategy",
357
  "Pre-Processing Text", "Pre-Processing Model", "Prompting Strategy"
358
  ]
 
359
  recipe_table = "| " + " | ".join(recipe_columns) + " |\n"
360
  recipe_table += "| " + " | ".join(["-" * len(col) for col in recipe_columns]) + " |\n"
361
  for _, row in df.iterrows():
362
- recipe_table += f"| {row['Recipe_ID']} | {row['Testing_Strategy_Text']} | {row['Schema_Processing_Model']} | {row['Pre_Processing_Strategy']} | {row['Pre_Processing_Text']} | {row['Pre_Processing_Model']} | {row['Prompting_Strategy']} |\n"
363
  markdown += recipe_table + "\n"
364
 
365
  # 3. Prompts
366
- markdown += "## Prompts\n"
367
  prompt_columns = ["Plantings and Fields Prompt", "Interactions Prompt", "Treatments Prompt"]
368
  prompt_table = "| " + " | ".join(prompt_columns) + " |\n"
369
  prompt_table += "| " + " | ".join(["-" * len(col) for col in prompt_columns]) + " |\n"
370
  for _, row in df.iterrows():
371
- prompt_table += f"| {row['Plantings_and_Fields_Prompt']} | {row['Interactions_Prompt']} | {row['Treatments_Prompt']} |\n"
372
  markdown += prompt_table + "\n"
373
 
374
- # 4. Side-by-Side Comparisons
375
- markdown += "## Gold Standard vs Machine Generated Key-Values\n"
376
  markdown += "| Key | Gold Standard | Machine Generated |\n"
377
  markdown += "|-----|---------------|-------------------|\n"
378
  for _, row in df.iterrows():
379
- markdown += f"| {row['Recipe_ID']} | {row['Gold_Standard_Key_Values']} | {row['Machine_Generated_Key_Values']} |\n"
 
 
 
380
  markdown += "\n"
381
 
382
  # 5. Differences
383
- markdown += "## Differences\n"
384
  markdown += "| Key | Difference |\n"
385
  markdown += "|-----|------------|\n"
386
  for _, row in df.iterrows():
387
  differences = row['Differences']
388
  if isinstance(differences, list):
389
- for diff in differences:
390
- if isinstance(diff, dict) and 'values_changed' in diff:
391
- for path, change in diff['values_changed'].items():
392
- if 'old_value' in change and 'new_value' in change:
393
- markdown += f"| {path} | {change['old_value']} {change['new_value']} |\n"
 
 
 
394
  else:
395
- markdown += "| No differences found | |\n"
 
396
 
397
  # 6. YAML Comparisons
398
- markdown += "## Gold Standard vs Machine Generated YAML\n"
 
399
  for _, row in df.iterrows():
 
400
  gold_yaml = yaml.safe_dump(yaml.safe_load(row['Gold_Standard_YAML']), default_flow_style=False)
401
  machine_yaml = yaml.safe_dump(yaml.safe_load(row['Machine_Generated_YAML']), default_flow_style=False)
402
 
 
403
  markdown += f"**Recipe ID {row['Recipe_ID']}**:\n\n"
404
  markdown += "**Gold Standard YAML:**\n"
405
  markdown += f"```yaml\n{gold_yaml}\n```\n"
406
  markdown += "**Machine Generated YAML:**\n"
407
  markdown += f"```yaml\n{machine_yaml}\n```\n\n"
408
 
409
- # Ensure clean separation
410
  markdown += "---\n\n"
411
  return markdown
412
 
 
340
  else:
341
  return data # Keep other types as-is
342
 
343
+ def format_json(json_data, truncate_length=500):
344
+ try:
345
+ # Try to load the JSON data
346
+ parsed_data = json.loads(json_data)
347
+ # Convert it into a pretty-printed string
348
+ formatted_json = json.dumps(parsed_data, indent=2)
349
+ # Truncate if it's too long
350
+ return formatted_json[:truncate_length] + "..." if len(formatted_json) > truncate_length else formatted_json
351
+ except json.JSONDecodeError:
352
+ # If it's not valid JSON, return the string as it is
353
+ return json_data[:truncate_length] + "..." if len(json_data) > truncate_length else json_data
354
+
355
  def generate_markdown_output(df):
 
356
  markdown = ""
357
 
358
  # 1. Input Transcript
359
+ markdown += "\n## Input Transcript\n" # Add space before header for consistency
360
  for _, row in df.iterrows():
361
  truncated_input = row['Input_Transcript'][:500] + "..." if len(row['Input_Transcript']) > 500 else row['Input_Transcript']
362
  markdown += f"**Recipe ID {row['Recipe_ID']}**:\n```\n{truncated_input}\n```\n\n"
363
 
364
  # 2. Recipe Fields
365
+ markdown += "\n## Recipe Fields\n" # Add space before header for consistency
366
  recipe_columns = [
367
  "Recipe ID", "Testing Strategy", "Schema Processing Model", "Pre-Processing Strategy",
368
  "Pre-Processing Text", "Pre-Processing Model", "Prompting Strategy"
369
  ]
370
+ # Generate table
371
  recipe_table = "| " + " | ".join(recipe_columns) + " |\n"
372
  recipe_table += "| " + " | ".join(["-" * len(col) for col in recipe_columns]) + " |\n"
373
  for _, row in df.iterrows():
374
+ recipe_table += "| " + " | ".join([str(row[col]) for col in recipe_columns]) + " |\n"
375
  markdown += recipe_table + "\n"
376
 
377
  # 3. Prompts
378
+ markdown += "\n## Prompts\n" # Add space before header for consistency
379
  prompt_columns = ["Plantings and Fields Prompt", "Interactions Prompt", "Treatments Prompt"]
380
  prompt_table = "| " + " | ".join(prompt_columns) + " |\n"
381
  prompt_table += "| " + " | ".join(["-" * len(col) for col in prompt_columns]) + " |\n"
382
  for _, row in df.iterrows():
383
+ prompt_table += "| " + " | ".join([str(row[col]) for col in prompt_columns]) + " |\n"
384
  markdown += prompt_table + "\n"
385
 
386
+ # 4. Gold Standard vs Machine Generated Key-Values
387
+ markdown += "\n## Gold Standard vs Machine Generated Key-Values\n"
388
  markdown += "| Key | Gold Standard | Machine Generated |\n"
389
  markdown += "|-----|---------------|-------------------|\n"
390
  for _, row in df.iterrows():
391
+ # Truncate or format the JSON-like data
392
+ gold_standard = format_json(row['Gold_Standard_Key_Values'])
393
+ machine_generated = format_json(row['Machine_Generated_Key_Values'])
394
+ markdown += f"| {row['Recipe_ID']} | {gold_standard} | {machine_generated} |\n"
395
  markdown += "\n"
396
 
397
  # 5. Differences
398
+ markdown += "\n## Differences\n"
399
  markdown += "| Key | Difference |\n"
400
  markdown += "|-----|------------|\n"
401
  for _, row in df.iterrows():
402
  differences = row['Differences']
403
  if isinstance(differences, list):
404
+ if len(differences) > 0:
405
+ for diff in differences:
406
+ if isinstance(diff, dict) and 'values_changed' in diff:
407
+ for path, change in diff['values_changed'].items():
408
+ if 'old_value' in change and 'new_value' in change:
409
+ markdown += f"| {path} | {change['old_value']} → {change['new_value']} |\n"
410
+ else:
411
+ markdown += f"| {row['Recipe_ID']} | No differences found |\n"
412
  else:
413
+ markdown += f"| {row['Recipe_ID']} | No differences found |\n"
414
+ markdown += "\n"
415
 
416
  # 6. YAML Comparisons
417
+ markdown = "\n## Gold Standard vs Machine Generated YAML\n"
418
+
419
  for _, row in df.iterrows():
420
+ # Ensure YAML data is properly loaded and formatted
421
  gold_yaml = yaml.safe_dump(yaml.safe_load(row['Gold_Standard_YAML']), default_flow_style=False)
422
  machine_yaml = yaml.safe_dump(yaml.safe_load(row['Machine_Generated_YAML']), default_flow_style=False)
423
 
424
+ # Add comparison to markdown
425
  markdown += f"**Recipe ID {row['Recipe_ID']}**:\n\n"
426
  markdown += "**Gold Standard YAML:**\n"
427
  markdown += f"```yaml\n{gold_yaml}\n```\n"
428
  markdown += "**Machine Generated YAML:**\n"
429
  markdown += f"```yaml\n{machine_yaml}\n```\n\n"
430
 
 
431
  markdown += "---\n\n"
432
  return markdown
433