"""CLI dataset validation script. Invokes the DatasetValidator with settings-based configuration and writes a validation report to artifacts/reports/data_validation_report.md. Exit codes: 0 — All validation checks passed. 1 — One or more validation checks failed. 2 — Setup failure (missing dataset directory, configuration error). """ import sys from pathlib import Path # Add AI_Services_V2 root to sys.path so imports work when run directly _SCRIPT_DIR = Path(__file__).resolve().parent _PROJECT_ROOT = _SCRIPT_DIR.parent sys.path.insert(0, str(_PROJECT_ROOT)) from app.core.config import settings from app.core.exceptions import DatasetError from app.data.loader import DatasetLoader from app.data.validator import DatasetValidator def main() -> int: """Run dataset validation and return exit code. Returns: 0 on success, 1 on validation failures, 2 on setup failures. """ try: dataset_path = Path(settings.dataset_dir) if not dataset_path.is_absolute(): dataset_path = (_PROJECT_ROOT / dataset_path).resolve() if not dataset_path.exists(): print(f"ERROR: Dataset directory not found: {dataset_path}") return 2 loader = DatasetLoader(dataset_path) metadata = loader.load_metadata() except DatasetError as exc: print(f"ERROR: Setup failure — {exc}") return 2 except Exception as exc: print(f"ERROR: Unexpected setup failure — {exc}") return 2 try: validator = DatasetValidator(loader, metadata) report = validator.run_all() report_path = Path(settings.reports_dir) if not report_path.is_absolute(): report_path = (_PROJECT_ROOT / report_path).resolve() report_path = report_path / "data_validation_report.md" validator.write_report(report, report_path) # Print summary to stdout if report.passed: print(f"PASSED: All {report.checks_run} validation checks passed.") print(f"Report written to: {report_path}") return 0 else: print( f"FAILED: {report.checks_run - report.checks_passed}/{report.checks_run} " f"checks failed with {len(report.issues)} issue(s)." ) for issue in report.issues: col_info = f" ({issue.column})" if issue.column else "" print(f" - [{issue.severity.upper()}] {issue.table}{col_info}: {issue.message}") print(f"\nReport written to: {report_path}") return 1 except DatasetError as exc: print(f"ERROR: Validation failure — {exc}") return 2 except Exception as exc: print(f"ERROR: Unexpected failure during validation — {exc}") return 2 if __name__ == "__main__": sys.exit(main())