thankrandomness commited on
Commit
bae9286
·
1 Parent(s): 70e635a

train test split

Browse files
Files changed (1) hide show
  1. app.py +10 -3
app.py CHANGED
@@ -1,11 +1,18 @@
1
  import pandas as pd
2
- from datasets import load_dataset
3
 
4
  # Load the dataset from Huggingface
5
- dataset = load_dataset('thankrandomness/spam-detection-sample')
 
 
 
 
 
 
 
6
 
7
  # Convert to a pandas DataFrame
8
- df = pd.DataFrame(dataset)
9
 
10
  # Display the first few rows of the dataframe to verify
11
  print(df.head())
 
1
  import pandas as pd
2
+ from datasets import load_dataset, DatasetDict
3
 
4
  # Load the dataset from Huggingface
5
+ dataset = load_dataset("thankrandomness/spam-detection-sample")
6
+
7
+ # Split the dataset into train and validation sets
8
+ split_dataset = dataset['train'].train_test_split(test_size=0.3, seed=42)
9
+ dataset = DatasetDict({
10
+ 'train': split_dataset['train'],
11
+ 'validation': split_dataset['test']
12
+ })
13
 
14
  # Convert to a pandas DataFrame
15
+ df = pd.DataFrame(dataset['train'])
16
 
17
  # Display the first few rows of the dataframe to verify
18
  print(df.head())