File size: 748 Bytes
5eaff91
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
"""
Save the first N SmolTalk conversations to a JSON file.
Run from project root:
    python -m scripts.save_smoltalk_samples
"""

import json
import os

from tasks.smoltalk import SmolTalk


def main():
    num_samples = 10
    output_path = "./smoltalk_samples.json"

    print(f"Loading SmolTalk train split...")
    smoltalk = SmolTalk(split="train")

    samples = []
    for i in range(min(num_samples, len(smoltalk))):
        conv = smoltalk[i]
        samples.append(conv)

    with open(output_path, "w", encoding="utf-8") as f:
        json.dump(samples, f, ensure_ascii=False, indent=2)

    abs_path = os.path.abspath(output_path)
    print(f"Saved {len(samples)} conversations to {abs_path}")


if __name__ == "__main__":
    main()