File size: 694 Bytes
d28330f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
from datasets import load_dataset

ds = load_dataset("JoeYing/ReTool-SFT")["train"]


def convert(sample):
    conversations = sample["messages"]

    def convert_role(role):
        if role == "user":
            return "user"
        elif role == "assistant":
            return "assistant"
        elif role == "system":
            return "system"
        else:
            raise ValueError(f"Unknown role: {role}")

    messages = [
        {
            "role": convert_role(turn["role"]),
            "content": turn["content"],
        }
        for turn in conversations
    ]

    return {"messages": messages}


ds = ds.map(convert)
ds.to_parquet("./data/retool/ReTool-SFT.parquet")