File size: 579 Bytes
97b93a8
bae9286
97b93a8
 
fb45d7b
bae9286
 
0b98bc0
a27d78e
 
 
 
97b93a8
 
7f1866c
97b93a8
 
a27d78e
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
import pandas as pd
from datasets import load_dataset, DatasetDict

# Load the dataset from Huggingface
dataset = load_dataset('thankrandomness/spam-detection-sample', data_files='spam-sample.csv')

# Split the dataset into train and validation sets
split_dataset = dataset['train'].train_test_split(test_size=0.3, seed=42)
dataset = DatasetDict({
    'train': split_dataset['train'],
    'validation': split_dataset['test']
})

# Convert to a pandas DataFrame
df = pd.DataFrame(dataset['train'])

# Display the first few rows of the dataframe to verify
print(df['body'].head())