File size: 484 Bytes
d632b3b | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 | from huggingface_hub import login
from datasets import load_dataset
def prepare(example):
# подставьте реальные поля вашего jsonl
inp = example.get("prompt","")
tgt = example.get("completion","")
return {"input_text": inp, "target_text": tgt}
ds = load_dataset("json", data_files="trainingSet.json", split="train")
ds = ds.map(prepare)
ds = ds.train_test_split(test_size=0.02)
ds.save_to_disk("processed_ds")
print("Saved processed_ds") |