File size: 382 Bytes
f08d3c9
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
import pandas as pd
from datasets import Dataset

def load_data(tokenizer):
    df = pd.read_csv("data/ats_dataset.csv")
    dataset = Dataset.from_pandas(df)

    def preprocess(example):
        return tokenizer(example["text"], padding="max_length", truncation=True)

    tokenized_dataset = dataset.map(preprocess)
    return tokenized_dataset.train_test_split(test_size=0.2)