rosemariafontana commited on
Commit
59fd861
·
verified ·
1 Parent(s): c494d38

Update script_for_automation.py

Browse files
Files changed (1) hide show
  1. script_for_automation.py +44 -45
script_for_automation.py CHANGED
@@ -329,52 +329,51 @@ def get_data_ready(recipe_dict, input_data_piece):
329
 
330
  print("DID THAT NOW")
331
  return processed_data
332
-
333
- import yaml
334
- import json
335
 
336
  def generate_markdown_output(df):
337
  # Start the markdown output string
338
  markdown = ""
339
 
340
- # Input Transcript Section
341
- markdown += "### Input Transcript\n"
342
- markdown += "Since the input transcript might be very long, it is truncated here for readability:\n\n"
343
-
344
  for _, row in df.iterrows():
345
- truncated_input = (row['Input_Transcript'][:500] + '...') if len(row['Input_Transcript']) > 500 else row['Input_Transcript']
346
- markdown += f"**Recipe ID {row['Recipe_ID']}**:\n\n{truncated_input}\n\n"
347
-
348
- # Recipe Fields Section
349
- markdown += "\n### Recipe Fields (Basic Information)\n"
350
- markdown += "| Recipe ID | Testing Strategy | Schema Processing Model | Pre-Processing Strategy | Pre-Processing Text | Pre-Processing Model | Prompting Strategy |\n"
351
- markdown += "|-----------|------------------|-------------------------|--------------------------|---------------------|----------------------|-------------------|\n"
352
-
 
 
 
353
  for _, row in df.iterrows():
354
- markdown += f"| {str(row['Recipe_ID']).ljust(10)} | {str(row['Testing_Strategy_Text']).ljust(20)} | {str(row['Schema_Processing_Model']).ljust(25)} | {str(row['Pre_Processing_Strategy']).ljust(23)} | {str(row['Pre_Processing_Text']).ljust(20)} | {str(row['Pre_Processing_Model']).ljust(20)} | {str(row['Prompting_Strategy']).ljust(25)} |\n"
355
-
356
- # Prompts Section
357
- markdown += "\n### Prompts\n"
358
- markdown += "| Plantings and Fields Prompt | Interactions Prompt | Treatments Prompt |\n"
359
- markdown += "|-----------------------------|---------------------|-------------------|\n"
360
-
 
361
  for _, row in df.iterrows():
362
- markdown += f"| {str(row['Plantings_and_Fields_Prompt']).ljust(30)} | {str(row['Interactions_Prompt']).ljust(20)} | {str(row['Treatments_Prompt']).ljust(20)} |\n"
 
363
 
364
- # Side-by-Side Comparison
365
- markdown += "\n### Gold Standard vs Machine Generated Key-Values\n"
366
  markdown += "| Key | Gold Standard | Machine Generated |\n"
367
  markdown += "|-----|---------------|-------------------|\n"
368
-
369
  for _, row in df.iterrows():
370
- markdown += f"| {str(row['Recipe_ID']).ljust(10)} | {str(row['Gold_Standard_Key_Values']).ljust(25)} | {str(row['Machine_Generated_Key_Values']).ljust(25)} |\n"
 
371
 
372
- # Differences Section
373
- markdown += "\n### Differences\n"
374
- markdown += "The following differences were found between the gold standard and the machine-generated output:\n\n"
375
  markdown += "| Key | Difference |\n"
376
  markdown += "|-----|------------|\n"
377
-
378
  for _, row in df.iterrows():
379
  differences = row['Differences']
380
  if isinstance(differences, list):
@@ -382,24 +381,24 @@ def generate_markdown_output(df):
382
  if isinstance(diff, dict) and 'values_changed' in diff:
383
  for path, change in diff['values_changed'].items():
384
  if 'old_value' in change and 'new_value' in change:
385
- markdown += f"| {str(path).ljust(20)} | {str(change['old_value']).ljust(20)} -> {str(change['new_value']).ljust(20)} |\n"
386
- else:
387
- markdown += f"| {str(path).ljust(20)} | (Missing old/new value) |\n"
388
- else:
389
- markdown += f"| (Invalid diff) | |\n"
390
  else:
391
- markdown += f"| (No differences) | |\n"
392
-
393
- # YAML Comparison Section
394
- markdown += "\n### Gold Standard vs Machine Generated YAML\n"
395
- markdown += "| Gold Standard YAML | Machine Generated YAML |\n"
396
- markdown += "|--------------------|------------------------|\n"
397
 
 
 
398
  for _, row in df.iterrows():
399
- gold_yaml = yaml.dump(yaml.safe_load(row['Gold_Standard_YAML']), default_flow_style=False)
400
- machine_yaml = yaml.dump(yaml.safe_load(row['Machine_Generated_YAML']), default_flow_style=False)
401
- markdown += f"| ```yaml\n{gold_yaml}``` | ```yaml\n{machine_yaml}``` |\n"
 
 
 
 
 
402
 
 
 
403
  return markdown
404
 
405
  def drive_process():
 
329
 
330
  print("DID THAT NOW")
331
  return processed_data
332
+
 
 
333
 
334
  def generate_markdown_output(df):
335
  # Start the markdown output string
336
  markdown = ""
337
 
338
+ # 1. Input Transcript
339
+ markdown += "## Input Transcript\n"
 
 
340
  for _, row in df.iterrows():
341
+ truncated_input = row['Input_Transcript'][:500] + "..." if len(row['Input_Transcript']) > 500 else row['Input_Transcript']
342
+ markdown += f"**Recipe ID {row['Recipe_ID']}**:\n```\n{truncated_input}\n```\n\n"
343
+
344
+ # 2. Recipe Fields
345
+ markdown += "## Recipe Fields\n"
346
+ recipe_columns = [
347
+ "Recipe ID", "Testing Strategy", "Schema Processing Model", "Pre-Processing Strategy",
348
+ "Pre-Processing Text", "Pre-Processing Model", "Prompting Strategy"
349
+ ]
350
+ recipe_table = "| " + " | ".join(recipe_columns) + " |\n"
351
+ recipe_table += "| " + " | ".join(["-" * len(col) for col in recipe_columns]) + " |\n"
352
  for _, row in df.iterrows():
353
+ recipe_table += f"| {row['Recipe_ID']} | {row['Testing_Strategy_Text']} | {row['Schema_Processing_Model']} | {row['Pre_Processing_Strategy']} | {row['Pre_Processing_Text']} | {row['Pre_Processing_Model']} | {row['Prompting_Strategy']} |\n"
354
+ markdown += recipe_table + "\n"
355
+
356
+ # 3. Prompts
357
+ markdown += "## Prompts\n"
358
+ prompt_columns = ["Plantings and Fields Prompt", "Interactions Prompt", "Treatments Prompt"]
359
+ prompt_table = "| " + " | ".join(prompt_columns) + " |\n"
360
+ prompt_table += "| " + " | ".join(["-" * len(col) for col in prompt_columns]) + " |\n"
361
  for _, row in df.iterrows():
362
+ prompt_table += f"| {row['Plantings_and_Fields_Prompt']} | {row['Interactions_Prompt']} | {row['Treatments_Prompt']} |\n"
363
+ markdown += prompt_table + "\n"
364
 
365
+ # 4. Side-by-Side Comparisons
366
+ markdown += "## Gold Standard vs Machine Generated Key-Values\n"
367
  markdown += "| Key | Gold Standard | Machine Generated |\n"
368
  markdown += "|-----|---------------|-------------------|\n"
 
369
  for _, row in df.iterrows():
370
+ markdown += f"| {row['Recipe_ID']} | {row['Gold_Standard_Key_Values']} | {row['Machine_Generated_Key_Values']} |\n"
371
+ markdown += "\n"
372
 
373
+ # 5. Differences
374
+ markdown += "## Differences\n"
 
375
  markdown += "| Key | Difference |\n"
376
  markdown += "|-----|------------|\n"
 
377
  for _, row in df.iterrows():
378
  differences = row['Differences']
379
  if isinstance(differences, list):
 
381
  if isinstance(diff, dict) and 'values_changed' in diff:
382
  for path, change in diff['values_changed'].items():
383
  if 'old_value' in change and 'new_value' in change:
384
+ markdown += f"| {path} | {change['old_value']} {change['new_value']} |\n"
 
 
 
 
385
  else:
386
+ markdown += "| No differences found | |\n"
 
 
 
 
 
387
 
388
+ # 6. YAML Comparisons
389
+ markdown += "## Gold Standard vs Machine Generated YAML\n"
390
  for _, row in df.iterrows():
391
+ gold_yaml = yaml.safe_dump(yaml.safe_load(row['Gold_Standard_YAML']), default_flow_style=False)
392
+ machine_yaml = yaml.safe_dump(yaml.safe_load(row['Machine_Generated_YAML']), default_flow_style=False)
393
+
394
+ markdown += f"**Recipe ID {row['Recipe_ID']}**:\n\n"
395
+ markdown += "**Gold Standard YAML:**\n"
396
+ markdown += f"```yaml\n{gold_yaml}\n```\n"
397
+ markdown += "**Machine Generated YAML:**\n"
398
+ markdown += f"```yaml\n{machine_yaml}\n```\n\n"
399
 
400
+ # Ensure clean separation
401
+ markdown += "---\n\n"
402
  return markdown
403
 
404
  def drive_process():