File size: 525 Bytes
f29d474 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 |
# Script to check dataset integrity
import os
COMBINED_DATA_PATH = "data/cleaned/mk_combined_data.txt"
def check_dataset():
if os.path.exists(COMBINED_DATA_PATH):
with open(COMBINED_DATA_PATH, "r", encoding="utf-8") as f:
samples = [t for t in f.read().split('\n\n') if t.strip()]
print(f"✅ Dataset contains {len(samples)} samples.")
else:
print("❌ Combined dataset not found! Run data/process_all_data.py")
if __name__ == "__main__":
check_dataset()
|