translation-model / preprocess.py
drixo's picture
Create preprocess.py
d9cc673 verified
from datasets import load_dataset
from config import DATASET_EN_ES
def load_and_prepare():
dataset = load_dataset(DATASET_EN_ES)
def format_example(example):
return {
"source": example["term"]["en"],
"target": example["term"]["es"]
}
dataset = dataset.map(format_example)
return dataset
if __name__ == "__main__":
ds = load_and_prepare()
print(ds["train"][0])