# synthetic_data.py import pandas as pd import random def generate_synthetic_dataset(num_agents=4, num_samples=20): data = [] tasks = ["qa", "summarization", "reasoning"] for a in range(num_agents): agent_name = f"Agent_{a}" for i in range(num_samples): task = random.choice(tasks) if task == "qa": prompt = f"What is the capital of country {i}?" reference = "Paris is the capital of France." if i % 2 == 0 else "Rome is the capital of Italy." response = reference if random.random() > 0.2 else "I think it's Berlin." elif task == "summarization": prompt = "Summarize: 'The sun is a star that provides light and heat to the Earth.'" reference = "The sun provides light and heat essential for life on Earth." response = "The sun provides light and heat for Earth." else: prompt = "Explain why the sky is blue." reference = "Rayleigh scattering causes shorter blue wavelengths to scatter more, making the sky appear blue." response = reference if random.random() > 0.3 else "It is because of reflection." data.append({ "prompt": prompt, "response": response, "task": task, "agent": agent_name, "reference": reference }) return pd.DataFrame(data)