|
|
""" |
|
|
Run all Deepchecks validation suites |
|
|
|
|
|
This script executes all data validation checks: |
|
|
1. Data Integrity Suite - validates training data quality |
|
|
2. Train-Test Validation Suite - ensures proper train/test split |
|
|
|
|
|
Usage: |
|
|
python tests/run_all_deepchecks.py |
|
|
""" |
|
|
|
|
|
import sys |
|
|
from pathlib import Path |
|
|
|
|
|
|
|
|
project_root = Path(__file__).resolve().parents[1] |
|
|
sys.path.insert(0, str(project_root)) |
|
|
|
|
|
from tests.test_data_integrity import ( |
|
|
run_data_integrity_suite, |
|
|
run_custom_integrity_checks, |
|
|
analyze_data_statistics |
|
|
) |
|
|
from tests.test_train_test_validation import ( |
|
|
run_train_test_validation_suite, |
|
|
run_custom_train_test_checks, |
|
|
compare_distributions, |
|
|
validate_split_quality |
|
|
) |
|
|
|
|
|
|
|
|
def main(): |
|
|
""" |
|
|
Run all Deepchecks validation suites and generate reports. |
|
|
""" |
|
|
import argparse |
|
|
|
|
|
parser = argparse.ArgumentParser(description='Run Deepchecks validation suites') |
|
|
parser.add_argument('--original', action='store_true', |
|
|
help='Use original data instead of cleaned data') |
|
|
args = parser.parse_args() |
|
|
|
|
|
use_cleaned = not args.original |
|
|
|
|
|
print("="*80) |
|
|
print(" DEEPCHECKS VALIDATION - COMPLETE SUITE") |
|
|
print("="*80) |
|
|
print(f"\nUsing {'CLEANED' if use_cleaned else 'ORIGINAL'} data") |
|
|
print("Reports will be saved in: reports/deepchecks/") |
|
|
print("\n" + "="*80) |
|
|
|
|
|
|
|
|
print("\nPHASE 1: DATA INTEGRITY VALIDATION") |
|
|
print("="*80) |
|
|
|
|
|
try: |
|
|
|
|
|
analyze_data_statistics(use_cleaned=use_cleaned) |
|
|
|
|
|
|
|
|
print("\n") |
|
|
integrity_result = run_data_integrity_suite(save_output=True, use_cleaned=use_cleaned) |
|
|
|
|
|
|
|
|
print("\n") |
|
|
custom_integrity_results = run_custom_integrity_checks(save_output=True, use_cleaned=use_cleaned) |
|
|
|
|
|
print("\nPhase 1 completed successfully!") |
|
|
|
|
|
except Exception as e: |
|
|
print(f"\nError in Phase 1: {str(e)}") |
|
|
return False |
|
|
|
|
|
|
|
|
print("\n\nPHASE 2: TRAIN-TEST VALIDATION") |
|
|
print("="*80) |
|
|
|
|
|
try: |
|
|
|
|
|
compare_distributions(use_cleaned=use_cleaned) |
|
|
|
|
|
|
|
|
print("\n") |
|
|
validate_split_quality(use_cleaned=use_cleaned) |
|
|
|
|
|
|
|
|
print("\n") |
|
|
train_test_suite_result = run_train_test_validation_suite(save_output=True, use_cleaned=use_cleaned) |
|
|
|
|
|
|
|
|
print("\n") |
|
|
custom_train_test_results = run_custom_train_test_checks(save_output=True, use_cleaned=use_cleaned) |
|
|
|
|
|
print("\nPhase 2 completed successfully!") |
|
|
|
|
|
except Exception as e: |
|
|
print(f"\nError in Phase 2: {str(e)}") |
|
|
return False |
|
|
|
|
|
|
|
|
print("\n\n" + "="*80) |
|
|
print(" VALIDATION SUMMARY") |
|
|
print("="*80) |
|
|
|
|
|
print("\nAll Deepchecks validation suites completed successfully!") |
|
|
print("\nGenerated Reports:") |
|
|
print(" - reports/deepchecks/data_integrity_suite_results.json") |
|
|
print(" - reports/deepchecks/train_test_validation_suite_results.json") |
|
|
print(" - reports/deepchecks/validation_summary.json") |
|
|
|
|
|
print("\nNext Steps:") |
|
|
print(" 1. Review the JSON reports for check results") |
|
|
print(" 2. Examine any warnings or failed checks") |
|
|
print(" 3. Address data quality issues if found") |
|
|
print(" 4. Document findings in your project documentation") |
|
|
|
|
|
print("\n" + "="*80) |
|
|
|
|
|
return True |
|
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
|
success = main() |
|
|
sys.exit(0 if success else 1) |
|
|
|