nanochat / scripts /save_smoltalk_samples.py
A113NW3I's picture
Upload folder using huggingface_hub
5eaff91 verified
"""
Save the first N SmolTalk conversations to a JSON file.
Run from project root:
python -m scripts.save_smoltalk_samples
"""
import json
import os
from tasks.smoltalk import SmolTalk
def main():
num_samples = 10
output_path = "./smoltalk_samples.json"
print(f"Loading SmolTalk train split...")
smoltalk = SmolTalk(split="train")
samples = []
for i in range(min(num_samples, len(smoltalk))):
conv = smoltalk[i]
samples.append(conv)
with open(output_path, "w", encoding="utf-8") as f:
json.dump(samples, f, ensure_ascii=False, indent=2)
abs_path = os.path.abspath(output_path)
print(f"Saved {len(samples)} conversations to {abs_path}")
if __name__ == "__main__":
main()