Spaces:

holistic-ai
/

explainbility_benchmark

Sleeping

Zekun Wu

update

8cb47d9 almost 2 years ago

2.26 kB

	from datasets import load_dataset
	import pandas as pd


	def get_data(sample_size):
	dataset = load_dataset("esnli")
	df = dataset['train'].to_pandas()

	esnli_train_df = df.dropna(subset=['hypothesis', 'explanation_1'])

	prompt_template = """You are an advanced AI trained to understand and explain natural language relationships. I will give you a pair of sentences: a premise and a hypothesis. Your task is to determine the relationship between them and provide a detailed explanation of your reasoning process. The possible relationships are "Entailment," "Contradiction," or "Neutral."

	Instructions:

	Read the given premise and hypothesis carefully.

	Identify the relationship between them based on the following definitions:

	Entailment: The hypothesis logically follows from the premise.
	Contradiction: The hypothesis directly contradicts the premise.
	Neutral: The hypothesis neither logically follows from nor contradicts the premise.

	Provide the relationship (Entailment, Contradiction, or Neutral).

	Explain in about ten words your reasoning to justify your conclusion.

	Example:

	Premise: "A man is playing a guitar."
	Hypothesis: "A man is making music."
	Relationship: Entailment
	Explanation: Playing guitar inherently involves creating music, fulfilling the hypothesis.

	Now, try it with the following pair:

	Premise: "{premise}"
	Hypothesis: "{hypothesis}"
	Relationship:
	"""

	# Generate prompts for the dataset
	def generate_prompts(df):
	prompts = []
	for _, row in df.iterrows():
	prompt = prompt_template.format(premise=row['premise'], hypothesis=row['hypothesis'])
	prompts.append({
	'question': prompt,
	'answer': {0: 'Entailment', 1: 'Neutral', 2: 'Contradiction'}[row['label']],
	'reference_explanation': row['explanation_1']
	})
	return prompts

	sample_df = esnli_train_df.sample(n=sample_size, random_state=42)
	prompts_data = generate_prompts(sample_df)

	prompts_df = pd.DataFrame(prompts_data)

	return prompts_df

	if __name__ == '__main__':
	sample_size = 5
	print(get_data(sample_size))