File size: 484 Bytes
d632b3b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
from huggingface_hub import login
from datasets import load_dataset


def prepare(example):
    # подставьте реальные поля вашего jsonl
    inp = example.get("prompt","")
    tgt = example.get("completion","")
    return {"input_text": inp, "target_text": tgt}

ds = load_dataset("json", data_files="trainingSet.json", split="train")
ds = ds.map(prepare)
ds = ds.train_test_split(test_size=0.02)
ds.save_to_disk("processed_ds")

print("Saved processed_ds")