Spaces:
Sleeping
Sleeping
File size: 2,104 Bytes
66b1c50 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 | """CLI subcommand: ``dataforge profile <path> [--schema <yaml>]``.
Reads a CSV file, runs all detectors, and renders detected issues as a
rich-formatted terminal table. Exit code 0 if no UNSAFE issues; 1 otherwise.
"""
from __future__ import annotations
from pathlib import Path
from typing import Annotated
import typer
from rich.console import Console
from dataforge.cli.common import load_schema, read_csv
from dataforge.detectors import run_all_detectors
from dataforge.detectors.base import Schema, Severity
from dataforge.ui.profile_view import render_profile_table
_console = Console(stderr=True)
def profile(
path: Annotated[
Path,
typer.Argument(
exists=True,
readable=True,
help="Path to the CSV file to profile.",
),
],
schema: Annotated[
Path | None,
typer.Option(
"--schema",
exists=True,
readable=True,
help="Path to a YAML schema file with column types and FDs.",
),
] = None,
) -> None:
"""Profile a CSV file for data-quality issues.
Reads the CSV, runs all detectors (type_mismatch, decimal_shift,
fd_violation), and renders a rich-formatted table of detected issues.
Exit code 0 if no UNSAFE issues are found; 1 if any UNSAFE issues exist.
"""
# Load the CSV with dtype=str to avoid pandas type-coercion artifacts.
try:
df = read_csv(path)
except Exception as exc:
_console.print(f"[bold red]Error reading CSV:[/bold red] {exc}")
raise typer.Exit(code=2) from exc
# Optionally load schema.
parsed_schema: Schema | None = None
if schema is not None:
parsed_schema = load_schema(schema)
# Run all detectors.
issues = run_all_detectors(df, parsed_schema)
# Render the results.
output_console = Console()
render_profile_table(issues, output_console, file_path=str(path))
# Exit code based on UNSAFE issues.
has_unsafe = any(i.severity == Severity.UNSAFE for i in issues)
if has_unsafe:
raise typer.Exit(code=1)
|