File size: 484 Bytes
5e0532d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
import glob
import os

def count_samples():
    path = "important/curated_data/*.jsonl"
    files = glob.glob(path)
    print(f"Counting samples in {len(files)} files...")
    total = 0
    for f in files:
        with open(f, "r", encoding="utf-8") as file:
            count = sum(1 for line in file)
            print(f"{os.path.basename(f)}: {count} samples")
            total += count
    print(f"Total curated samples: {total}")

if __name__ == "__main__":
    count_samples()