Spaces:
Runtime error
Runtime error
Commit
·
bae9286
1
Parent(s):
70e635a
train test split
Browse files
app.py
CHANGED
|
@@ -1,11 +1,18 @@
|
|
| 1 |
import pandas as pd
|
| 2 |
-
from datasets import load_dataset
|
| 3 |
|
| 4 |
# Load the dataset from Huggingface
|
| 5 |
-
dataset = load_dataset(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6 |
|
| 7 |
# Convert to a pandas DataFrame
|
| 8 |
-
df = pd.DataFrame(dataset)
|
| 9 |
|
| 10 |
# Display the first few rows of the dataframe to verify
|
| 11 |
print(df.head())
|
|
|
|
| 1 |
import pandas as pd
|
| 2 |
+
from datasets import load_dataset, DatasetDict
|
| 3 |
|
| 4 |
# Load the dataset from Huggingface
|
| 5 |
+
dataset = load_dataset("thankrandomness/spam-detection-sample")
|
| 6 |
+
|
| 7 |
+
# Split the dataset into train and validation sets
|
| 8 |
+
split_dataset = dataset['train'].train_test_split(test_size=0.3, seed=42)
|
| 9 |
+
dataset = DatasetDict({
|
| 10 |
+
'train': split_dataset['train'],
|
| 11 |
+
'validation': split_dataset['test']
|
| 12 |
+
})
|
| 13 |
|
| 14 |
# Convert to a pandas DataFrame
|
| 15 |
+
df = pd.DataFrame(dataset['train'])
|
| 16 |
|
| 17 |
# Display the first few rows of the dataframe to verify
|
| 18 |
print(df.head())
|