evaluation-framework / synthetic_data.py
Supastrikas-004's picture
Update synthetic_data.py
233811e verified
# synthetic_data.py
import pandas as pd
import random
def generate_synthetic_dataset(num_agents=4, num_samples=20):
data = []
tasks = ["qa", "summarization", "reasoning"]
for a in range(num_agents):
agent_name = f"Agent_{a}"
for i in range(num_samples):
task = random.choice(tasks)
if task == "qa":
prompt = f"What is the capital of country {i}?"
reference = "Paris is the capital of France." if i % 2 == 0 else "Rome is the capital of Italy."
response = reference if random.random() > 0.2 else "I think it's Berlin."
elif task == "summarization":
prompt = "Summarize: 'The sun is a star that provides light and heat to the Earth.'"
reference = "The sun provides light and heat essential for life on Earth."
response = "The sun provides light and heat for Earth."
else:
prompt = "Explain why the sky is blue."
reference = "Rayleigh scattering causes shorter blue wavelengths to scatter more, making the sky appear blue."
response = reference if random.random() > 0.3 else "It is because of reflection."
data.append({
"prompt": prompt,
"response": response,
"task": task,
"agent": agent_name,
"reference": reference
})
return pd.DataFrame(data)