Spaces:
Sleeping
Sleeping
| """ | |
| Test script for data quality analysis module. | |
| Run this to verify the quality checks work correctly and generate | |
| a quality report for the MNIST dataset. | |
| """ | |
| import sys | |
| import json | |
| from pathlib import Path | |
| # Add project root to path | |
| project_root = Path(__file__).parent.parent | |
| sys.path.insert(0, str(project_root)) | |
| from scripts.data_loader import MnistDataloader | |
| from scripts.data_quality import generate_quality_report, print_quality_summary | |
| def main(): | |
| """Run quality checks and save report.""" | |
| print("Loading MNIST dataset...") | |
| data_path = project_root / "data" / "raw" | |
| loader = MnistDataloader( | |
| str(data_path / "train-images.idx3-ubyte"), | |
| str(data_path / "train-labels.idx1-ubyte"), | |
| str(data_path / "t10k-images.idx3-ubyte"), | |
| str(data_path / "t10k-labels.idx1-ubyte") | |
| ) | |
| (x_train, y_train), (x_test, y_test) = loader.load_data() | |
| print("✓ Dataset loaded\n") | |
| # Generate quality report | |
| print("Running quality checks...") | |
| report = generate_quality_report((x_train, y_train), (x_test, y_test)) | |
| print("✓ Quality checks complete\n") | |
| # Print summary | |
| print_quality_summary(report) | |
| # Save report as JSON | |
| output_path = project_root / "data" / "quality_report.json" | |
| with open(output_path, 'w') as f: | |
| json.dump(report, f, indent=2) | |
| print(f"✓ Quality report saved to: {output_path}") | |
| return 0 if report['summary']['all_checks_pass'] else 1 | |
| if __name__ == "__main__": | |
| exit_code = main() | |
| sys.exit(exit_code) | |