Spaces:
Sleeping
Sleeping
| """CLI dataset validation script. | |
| Invokes the DatasetValidator with settings-based configuration and writes | |
| a validation report to artifacts/reports/data_validation_report.md. | |
| Exit codes: | |
| 0 β All validation checks passed. | |
| 1 β One or more validation checks failed. | |
| 2 β Setup failure (missing dataset directory, configuration error). | |
| """ | |
| import sys | |
| from pathlib import Path | |
| # Add AI_Services_V2 root to sys.path so imports work when run directly | |
| _SCRIPT_DIR = Path(__file__).resolve().parent | |
| _PROJECT_ROOT = _SCRIPT_DIR.parent | |
| sys.path.insert(0, str(_PROJECT_ROOT)) | |
| from app.core.config import settings | |
| from app.core.exceptions import DatasetError | |
| from app.data.loader import DatasetLoader | |
| from app.data.validator import DatasetValidator | |
| def main() -> int: | |
| """Run dataset validation and return exit code. | |
| Returns: | |
| 0 on success, 1 on validation failures, 2 on setup failures. | |
| """ | |
| try: | |
| dataset_path = Path(settings.dataset_dir) | |
| if not dataset_path.is_absolute(): | |
| dataset_path = (_PROJECT_ROOT / dataset_path).resolve() | |
| if not dataset_path.exists(): | |
| print(f"ERROR: Dataset directory not found: {dataset_path}") | |
| return 2 | |
| loader = DatasetLoader(dataset_path) | |
| metadata = loader.load_metadata() | |
| except DatasetError as exc: | |
| print(f"ERROR: Setup failure β {exc}") | |
| return 2 | |
| except Exception as exc: | |
| print(f"ERROR: Unexpected setup failure β {exc}") | |
| return 2 | |
| try: | |
| validator = DatasetValidator(loader, metadata) | |
| report = validator.run_all() | |
| report_path = Path(settings.reports_dir) | |
| if not report_path.is_absolute(): | |
| report_path = (_PROJECT_ROOT / report_path).resolve() | |
| report_path = report_path / "data_validation_report.md" | |
| validator.write_report(report, report_path) | |
| # Print summary to stdout | |
| if report.passed: | |
| print(f"PASSED: All {report.checks_run} validation checks passed.") | |
| print(f"Report written to: {report_path}") | |
| return 0 | |
| else: | |
| print( | |
| f"FAILED: {report.checks_run - report.checks_passed}/{report.checks_run} " | |
| f"checks failed with {len(report.issues)} issue(s)." | |
| ) | |
| for issue in report.issues: | |
| col_info = f" ({issue.column})" if issue.column else "" | |
| print(f" - [{issue.severity.upper()}] {issue.table}{col_info}: {issue.message}") | |
| print(f"\nReport written to: {report_path}") | |
| return 1 | |
| except DatasetError as exc: | |
| print(f"ERROR: Validation failure β {exc}") | |
| return 2 | |
| except Exception as exc: | |
| print(f"ERROR: Unexpected failure during validation β {exc}") | |
| return 2 | |
| if __name__ == "__main__": | |
| sys.exit(main()) | |