| | from datasets import load_dataset |
| | import pandas as pd |
| |
|
| |
|
| | def get_data(sample_size): |
| | dataset = load_dataset("esnli") |
| | df = dataset['train'].to_pandas() |
| |
|
| | esnli_train_df = df.dropna(subset=['hypothesis', 'explanation_1']) |
| |
|
| | prompt_template = """You are an advanced AI trained to understand and explain natural language relationships. I will give you a pair of sentences: a premise and a hypothesis. Your task is to determine the relationship between them and provide a detailed explanation of your reasoning process. The possible relationships are "Entailment," "Contradiction," or "Neutral." |
| | |
| | Instructions: |
| | |
| | Read the given premise and hypothesis carefully. |
| | |
| | Identify the relationship between them based on the following definitions: |
| | |
| | Entailment: The hypothesis logically follows from the premise. |
| | Contradiction: The hypothesis directly contradicts the premise. |
| | Neutral: The hypothesis neither logically follows from nor contradicts the premise. |
| | |
| | Provide the relationship (Entailment, Contradiction, or Neutral). |
| | |
| | Explain in about ten words your reasoning to justify your conclusion. |
| | |
| | Example: |
| | |
| | Premise: "A man is playing a guitar." |
| | Hypothesis: "A man is making music." |
| | Relationship: Entailment |
| | Explanation: Playing guitar inherently involves creating music, fulfilling the hypothesis. |
| | |
| | Now, try it with the following pair: |
| | |
| | Premise: "{premise}" |
| | Hypothesis: "{hypothesis}" |
| | Relationship: |
| | """ |
| |
|
| | |
| | def generate_prompts(df): |
| | prompts = [] |
| | for _, row in df.iterrows(): |
| | prompt = prompt_template.format(premise=row['premise'], hypothesis=row['hypothesis']) |
| | prompts.append({ |
| | 'question': prompt, |
| | 'answer': {0: 'Entailment', 1: 'Neutral', 2: 'Contradiction'}[row['label']], |
| | 'reference_explanation': row['explanation_1'] |
| | }) |
| | return prompts |
| |
|
| | sample_df = esnli_train_df.sample(n=sample_size, random_state=42) |
| | prompts_data = generate_prompts(sample_df) |
| |
|
| | prompts_df = pd.DataFrame(prompts_data) |
| |
|
| | return prompts_df |
| |
|
| | if __name__ == '__main__': |
| | sample_size = 5 |
| | print(get_data(sample_size)) |