Spaces:
Sleeping
Sleeping
| import glob | |
| import os | |
| def count_samples(): | |
| path = "important/curated_data/*.jsonl" | |
| files = glob.glob(path) | |
| print(f"Counting samples in {len(files)} files...") | |
| total = 0 | |
| for f in files: | |
| with open(f, "r", encoding="utf-8") as file: | |
| count = sum(1 for line in file) | |
| print(f"{os.path.basename(f)}: {count} samples") | |
| total += count | |
| print(f"Total curated samples: {total}") | |
| if __name__ == "__main__": | |
| count_samples() | |