File size: 2,880 Bytes
3795605
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
"""CLI dataset validation script.

Invokes the DatasetValidator with settings-based configuration and writes
a validation report to artifacts/reports/data_validation_report.md.

Exit codes:
    0 β€” All validation checks passed.
    1 β€” One or more validation checks failed.
    2 β€” Setup failure (missing dataset directory, configuration error).
"""

import sys
from pathlib import Path

# Add AI_Services_V2 root to sys.path so imports work when run directly
_SCRIPT_DIR = Path(__file__).resolve().parent
_PROJECT_ROOT = _SCRIPT_DIR.parent
sys.path.insert(0, str(_PROJECT_ROOT))

from app.core.config import settings
from app.core.exceptions import DatasetError
from app.data.loader import DatasetLoader
from app.data.validator import DatasetValidator


def main() -> int:
    """Run dataset validation and return exit code.

    Returns:
        0 on success, 1 on validation failures, 2 on setup failures.
    """
    try:
        dataset_path = Path(settings.dataset_dir)
        if not dataset_path.is_absolute():
            dataset_path = (_PROJECT_ROOT / dataset_path).resolve()

        if not dataset_path.exists():
            print(f"ERROR: Dataset directory not found: {dataset_path}")
            return 2

        loader = DatasetLoader(dataset_path)
        metadata = loader.load_metadata()

    except DatasetError as exc:
        print(f"ERROR: Setup failure β€” {exc}")
        return 2
    except Exception as exc:
        print(f"ERROR: Unexpected setup failure β€” {exc}")
        return 2

    try:
        validator = DatasetValidator(loader, metadata)
        report = validator.run_all()

        report_path = Path(settings.reports_dir)
        if not report_path.is_absolute():
            report_path = (_PROJECT_ROOT / report_path).resolve()
        report_path = report_path / "data_validation_report.md"

        validator.write_report(report, report_path)

        # Print summary to stdout
        if report.passed:
            print(f"PASSED: All {report.checks_run} validation checks passed.")
            print(f"Report written to: {report_path}")
            return 0
        else:
            print(
                f"FAILED: {report.checks_run - report.checks_passed}/{report.checks_run} "
                f"checks failed with {len(report.issues)} issue(s)."
            )
            for issue in report.issues:
                col_info = f" ({issue.column})" if issue.column else ""
                print(f"  - [{issue.severity.upper()}] {issue.table}{col_info}: {issue.message}")
            print(f"\nReport written to: {report_path}")
            return 1

    except DatasetError as exc:
        print(f"ERROR: Validation failure β€” {exc}")
        return 2
    except Exception as exc:
        print(f"ERROR: Unexpected failure during validation β€” {exc}")
        return 2


if __name__ == "__main__":
    sys.exit(main())