ORA / scripts /count_curated.py
Abdalkaderdev's picture
Initial ORA deployment
5e0532d
import glob
import os
def count_samples():
path = "important/curated_data/*.jsonl"
files = glob.glob(path)
print(f"Counting samples in {len(files)} files...")
total = 0
for f in files:
with open(f, "r", encoding="utf-8") as file:
count = sum(1 for line in file)
print(f"{os.path.basename(f)}: {count} samples")
total += count
print(f"Total curated samples: {total}")
if __name__ == "__main__":
count_samples()