ATS_RESUME_CHECKER / src /data_loader.py
KarthikeyanDev's picture
Initial commit with model and scripts
f08d3c9
import pandas as pd
from datasets import Dataset
def load_data(tokenizer):
df = pd.read_csv("data/ats_dataset.csv")
dataset = Dataset.from_pandas(df)
def preprocess(example):
return tokenizer(example["text"], padding="max_length", truncation=True)
tokenized_dataset = dataset.map(preprocess)
return tokenized_dataset.train_test_split(test_size=0.2)