File size: 382 Bytes
f08d3c9 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 |
import pandas as pd
from datasets import Dataset
def load_data(tokenizer):
df = pd.read_csv("data/ats_dataset.csv")
dataset = Dataset.from_pandas(df)
def preprocess(example):
return tokenizer(example["text"], padding="max_length", truncation=True)
tokenized_dataset = dataset.map(preprocess)
return tokenized_dataset.train_test_split(test_size=0.2)
|