File size: 748 Bytes
5eaff91 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 | """
Save the first N SmolTalk conversations to a JSON file.
Run from project root:
python -m scripts.save_smoltalk_samples
"""
import json
import os
from tasks.smoltalk import SmolTalk
def main():
num_samples = 10
output_path = "./smoltalk_samples.json"
print(f"Loading SmolTalk train split...")
smoltalk = SmolTalk(split="train")
samples = []
for i in range(min(num_samples, len(smoltalk))):
conv = smoltalk[i]
samples.append(conv)
with open(output_path, "w", encoding="utf-8") as f:
json.dump(samples, f, ensure_ascii=False, indent=2)
abs_path = os.path.abspath(output_path)
print(f"Saved {len(samples)} conversations to {abs_path}")
if __name__ == "__main__":
main()
|