File size: 694 Bytes
d28330f | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 | from datasets import load_dataset
ds = load_dataset("JoeYing/ReTool-SFT")["train"]
def convert(sample):
conversations = sample["messages"]
def convert_role(role):
if role == "user":
return "user"
elif role == "assistant":
return "assistant"
elif role == "system":
return "system"
else:
raise ValueError(f"Unknown role: {role}")
messages = [
{
"role": convert_role(turn["role"]),
"content": turn["content"],
}
for turn in conversations
]
return {"messages": messages}
ds = ds.map(convert)
ds.to_parquet("./data/retool/ReTool-SFT.parquet")
|