""" Test script for data quality analysis module. Run this to verify the quality checks work correctly and generate a quality report for the MNIST dataset. """ import sys import json from pathlib import Path # Add project root to path project_root = Path(__file__).parent.parent sys.path.insert(0, str(project_root)) from scripts.data_loader import MnistDataloader from scripts.data_quality import generate_quality_report, print_quality_summary def main(): """Run quality checks and save report.""" print("Loading MNIST dataset...") data_path = project_root / "data" / "raw" loader = MnistDataloader( str(data_path / "train-images.idx3-ubyte"), str(data_path / "train-labels.idx1-ubyte"), str(data_path / "t10k-images.idx3-ubyte"), str(data_path / "t10k-labels.idx1-ubyte") ) (x_train, y_train), (x_test, y_test) = loader.load_data() print("✓ Dataset loaded\n") # Generate quality report print("Running quality checks...") report = generate_quality_report((x_train, y_train), (x_test, y_test)) print("✓ Quality checks complete\n") # Print summary print_quality_summary(report) # Save report as JSON output_path = project_root / "data" / "quality_report.json" with open(output_path, 'w') as f: json.dump(report, f, indent=2) print(f"✓ Quality report saved to: {output_path}") return 0 if report['summary']['all_checks_pass'] else 1 if __name__ == "__main__": exit_code = main() sys.exit(exit_code)