| """ | |
| Save the first N SmolTalk conversations to a JSON file. | |
| Run from project root: | |
| python -m scripts.save_smoltalk_samples | |
| """ | |
| import json | |
| import os | |
| from tasks.smoltalk import SmolTalk | |
| def main(): | |
| num_samples = 10 | |
| output_path = "./smoltalk_samples.json" | |
| print(f"Loading SmolTalk train split...") | |
| smoltalk = SmolTalk(split="train") | |
| samples = [] | |
| for i in range(min(num_samples, len(smoltalk))): | |
| conv = smoltalk[i] | |
| samples.append(conv) | |
| with open(output_path, "w", encoding="utf-8") as f: | |
| json.dump(samples, f, ensure_ascii=False, indent=2) | |
| abs_path = os.path.abspath(output_path) | |
| print(f"Saved {len(samples)} conversations to {abs_path}") | |
| if __name__ == "__main__": | |
| main() | |