File size: 525 Bytes
f29d474
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18

# Script to check dataset integrity

import os

COMBINED_DATA_PATH = "data/cleaned/mk_combined_data.txt"

def check_dataset():
    if os.path.exists(COMBINED_DATA_PATH):
        with open(COMBINED_DATA_PATH, "r", encoding="utf-8") as f:
            samples = [t for t in f.read().split('\n\n') if t.strip()]
        print(f"✅ Dataset contains {len(samples)} samples.")
    else:
        print("❌ Combined dataset not found! Run data/process_all_data.py")

if __name__ == "__main__":
    check_dataset()