Spaces:
Sleeping
Sleeping
File size: 2,880 Bytes
3795605 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 | """CLI dataset validation script.
Invokes the DatasetValidator with settings-based configuration and writes
a validation report to artifacts/reports/data_validation_report.md.
Exit codes:
0 β All validation checks passed.
1 β One or more validation checks failed.
2 β Setup failure (missing dataset directory, configuration error).
"""
import sys
from pathlib import Path
# Add AI_Services_V2 root to sys.path so imports work when run directly
_SCRIPT_DIR = Path(__file__).resolve().parent
_PROJECT_ROOT = _SCRIPT_DIR.parent
sys.path.insert(0, str(_PROJECT_ROOT))
from app.core.config import settings
from app.core.exceptions import DatasetError
from app.data.loader import DatasetLoader
from app.data.validator import DatasetValidator
def main() -> int:
"""Run dataset validation and return exit code.
Returns:
0 on success, 1 on validation failures, 2 on setup failures.
"""
try:
dataset_path = Path(settings.dataset_dir)
if not dataset_path.is_absolute():
dataset_path = (_PROJECT_ROOT / dataset_path).resolve()
if not dataset_path.exists():
print(f"ERROR: Dataset directory not found: {dataset_path}")
return 2
loader = DatasetLoader(dataset_path)
metadata = loader.load_metadata()
except DatasetError as exc:
print(f"ERROR: Setup failure β {exc}")
return 2
except Exception as exc:
print(f"ERROR: Unexpected setup failure β {exc}")
return 2
try:
validator = DatasetValidator(loader, metadata)
report = validator.run_all()
report_path = Path(settings.reports_dir)
if not report_path.is_absolute():
report_path = (_PROJECT_ROOT / report_path).resolve()
report_path = report_path / "data_validation_report.md"
validator.write_report(report, report_path)
# Print summary to stdout
if report.passed:
print(f"PASSED: All {report.checks_run} validation checks passed.")
print(f"Report written to: {report_path}")
return 0
else:
print(
f"FAILED: {report.checks_run - report.checks_passed}/{report.checks_run} "
f"checks failed with {len(report.issues)} issue(s)."
)
for issue in report.issues:
col_info = f" ({issue.column})" if issue.column else ""
print(f" - [{issue.severity.upper()}] {issue.table}{col_info}: {issue.message}")
print(f"\nReport written to: {report_path}")
return 1
except DatasetError as exc:
print(f"ERROR: Validation failure β {exc}")
return 2
except Exception as exc:
print(f"ERROR: Unexpected failure during validation β {exc}")
return 2
if __name__ == "__main__":
sys.exit(main())
|