Spaces:
Sleeping
Sleeping
| """ | |
| Run all Deepchecks validation suites | |
| This script executes all data validation checks: | |
| 1. Data Integrity Suite - validates training data quality | |
| 2. Train-Test Validation Suite - ensures proper train/test split | |
| Usage: | |
| python tests/run_all_deepchecks.py | |
| """ | |
| import sys | |
| from pathlib import Path | |
| # Add project root to path | |
| project_root = Path(__file__).resolve().parents[1] | |
| sys.path.insert(0, str(project_root)) | |
| from tests.test_data_integrity import ( | |
| run_data_integrity_suite, | |
| run_custom_integrity_checks, | |
| analyze_data_statistics | |
| ) | |
| from tests.test_train_test_validation import ( | |
| run_train_test_validation_suite, | |
| run_custom_train_test_checks, | |
| compare_distributions, | |
| validate_split_quality | |
| ) | |
| def main(): | |
| """ | |
| Run all Deepchecks validation suites and generate reports. | |
| """ | |
| import argparse | |
| parser = argparse.ArgumentParser(description='Run Deepchecks validation suites') | |
| parser.add_argument('--original', action='store_true', | |
| help='Use original data instead of cleaned data') | |
| args = parser.parse_args() | |
| use_cleaned = not args.original | |
| print("="*80) | |
| print(" DEEPCHECKS VALIDATION - COMPLETE SUITE") | |
| print("="*80) | |
| print(f"\nUsing {'CLEANED' if use_cleaned else 'ORIGINAL'} data") | |
| print("Reports will be saved in: reports/deepchecks/") | |
| print("\n" + "="*80) | |
| # Phase 1: Data Integrity Checks | |
| print("\nPHASE 1: DATA INTEGRITY VALIDATION") | |
| print("="*80) | |
| try: | |
| # Dataset statistics | |
| analyze_data_statistics(use_cleaned=use_cleaned) | |
| # Run full integrity suite | |
| print("\n") | |
| integrity_result = run_data_integrity_suite(save_output=True, use_cleaned=use_cleaned) | |
| # Run custom integrity checks | |
| print("\n") | |
| custom_integrity_results = run_custom_integrity_checks(save_output=True, use_cleaned=use_cleaned) | |
| print("\nPhase 1 completed successfully!") | |
| except Exception as e: | |
| print(f"\nError in Phase 1: {str(e)}") | |
| return False | |
| # Phase 2: Train-Test Validation | |
| print("\n\nPHASE 2: TRAIN-TEST VALIDATION") | |
| print("="*80) | |
| try: | |
| # Distribution comparison | |
| compare_distributions(use_cleaned=use_cleaned) | |
| # Split quality validation | |
| print("\n") | |
| validate_split_quality(use_cleaned=use_cleaned) | |
| # Run full train-test suite | |
| print("\n") | |
| train_test_suite_result = run_train_test_validation_suite(save_output=True, use_cleaned=use_cleaned) | |
| # Run custom train-test checks | |
| print("\n") | |
| custom_train_test_results = run_custom_train_test_checks(save_output=True, use_cleaned=use_cleaned) | |
| print("\nPhase 2 completed successfully!") | |
| except Exception as e: | |
| print(f"\nError in Phase 2: {str(e)}") | |
| return False | |
| # Summary | |
| print("\n\n" + "="*80) | |
| print(" VALIDATION SUMMARY") | |
| print("="*80) | |
| print("\nAll Deepchecks validation suites completed successfully!") | |
| print("\nGenerated Reports:") | |
| print(" - reports/deepchecks/data_integrity_suite_results.json") | |
| print(" - reports/deepchecks/train_test_validation_suite_results.json") | |
| print(" - reports/deepchecks/validation_summary.json") | |
| print("\nNext Steps:") | |
| print(" 1. Review the JSON reports for check results") | |
| print(" 2. Examine any warnings or failed checks") | |
| print(" 3. Address data quality issues if found") | |
| print(" 4. Document findings in your project documentation") | |
| print("\n" + "="*80) | |
| return True | |
| if __name__ == "__main__": | |
| success = main() | |
| sys.exit(0 if success else 1) | |