""" Run all Deepchecks validation suites This script executes all data validation checks: 1. Data Integrity Suite - validates training data quality 2. Train-Test Validation Suite - ensures proper train/test split Usage: python tests/run_all_deepchecks.py """ import sys from pathlib import Path # Add project root to path project_root = Path(__file__).resolve().parents[1] sys.path.insert(0, str(project_root)) from tests.test_data_integrity import ( run_data_integrity_suite, run_custom_integrity_checks, analyze_data_statistics ) from tests.test_train_test_validation import ( run_train_test_validation_suite, run_custom_train_test_checks, compare_distributions, validate_split_quality ) def main(): """ Run all Deepchecks validation suites and generate reports. """ import argparse parser = argparse.ArgumentParser(description='Run Deepchecks validation suites') parser.add_argument('--original', action='store_true', help='Use original data instead of cleaned data') args = parser.parse_args() use_cleaned = not args.original print("="*80) print(" DEEPCHECKS VALIDATION - COMPLETE SUITE") print("="*80) print(f"\nUsing {'CLEANED' if use_cleaned else 'ORIGINAL'} data") print("Reports will be saved in: reports/deepchecks/") print("\n" + "="*80) # Phase 1: Data Integrity Checks print("\nPHASE 1: DATA INTEGRITY VALIDATION") print("="*80) try: # Dataset statistics analyze_data_statistics(use_cleaned=use_cleaned) # Run full integrity suite print("\n") integrity_result = run_data_integrity_suite(save_output=True, use_cleaned=use_cleaned) # Run custom integrity checks print("\n") custom_integrity_results = run_custom_integrity_checks(save_output=True, use_cleaned=use_cleaned) print("\nPhase 1 completed successfully!") except Exception as e: print(f"\nError in Phase 1: {str(e)}") return False # Phase 2: Train-Test Validation print("\n\nPHASE 2: TRAIN-TEST VALIDATION") print("="*80) try: # Distribution comparison compare_distributions(use_cleaned=use_cleaned) # Split quality validation print("\n") validate_split_quality(use_cleaned=use_cleaned) # Run full train-test suite print("\n") train_test_suite_result = run_train_test_validation_suite(save_output=True, use_cleaned=use_cleaned) # Run custom train-test checks print("\n") custom_train_test_results = run_custom_train_test_checks(save_output=True, use_cleaned=use_cleaned) print("\nPhase 2 completed successfully!") except Exception as e: print(f"\nError in Phase 2: {str(e)}") return False # Summary print("\n\n" + "="*80) print(" VALIDATION SUMMARY") print("="*80) print("\nAll Deepchecks validation suites completed successfully!") print("\nGenerated Reports:") print(" - reports/deepchecks/data_integrity_suite_results.json") print(" - reports/deepchecks/train_test_validation_suite_results.json") print(" - reports/deepchecks/validation_summary.json") print("\nNext Steps:") print(" 1. Review the JSON reports for check results") print(" 2. Examine any warnings or failed checks") print(" 3. Address data quality issues if found") print(" 4. Document findings in your project documentation") print("\n" + "="*80) return True if __name__ == "__main__": success = main() sys.exit(0 if success else 1)