JustinTX's picture
Add files using upload-large-folder tool
d28330f verified
from datasets import load_dataset
ds = load_dataset("JoeYing/ReTool-SFT")["train"]
def convert(sample):
conversations = sample["messages"]
def convert_role(role):
if role == "user":
return "user"
elif role == "assistant":
return "assistant"
elif role == "system":
return "system"
else:
raise ValueError(f"Unknown role: {role}")
messages = [
{
"role": convert_role(turn["role"]),
"content": turn["content"],
}
for turn in conversations
]
return {"messages": messages}
ds = ds.map(convert)
ds.to_parquet("./data/retool/ReTool-SFT.parquet")