{ "name": "Data quality", "objective": "Keep a recurring dataset or knowledge base trustworthy by validating each refresh against explicit quality rules before it is published.", "trigger": { "type": "event", "cadence_or_event": "After each dataset or knowledge-base refresh, or nightly; also when the source schema changes or a freshness SLA is breached." }, "intake": { "sources": ["new dataset version", "schema and row counts", "prior accepted version", "quality rule definitions"], "selection_rule": "Validate the latest version against the data contract and quality rules; quarantine on any hard-rule failure." }, "workspace": { "isolation": "Read access to the dataset, schema, profiling tools, and prior versions; quarantine area separate from production.", "allowed_actions": ["run validation queries", "profile the data", "write a quality report", "quarantine a failing version"], "disallowed_actions": ["edit source data", "publish to production", "relax rules without review"] }, "context": { "required_files": ["data contract or schema", "quality-rule definitions", "ownership map"], "runtime_sources": ["load metrics", "profiling output", "diff against the last accepted version"] }, "agents": [ { "role": "Profiler", "responsibility": "Compute counts, distributions, null rates, and freshness for the new version." }, { "role": "Validator", "responsibility": "Check the profile against quality rules and the prior accepted baseline." }, { "role": "Investigator", "responsibility": "Classify each failure as a defect, an accepted shift, or a rule bug." }, { "role": "Reporter", "responsibility": "Record results, quarantine bad versions, and propose rule fixes." } ], "verification": { "gates": ["hard rules pass before a version is promotable", "distribution shifts beyond threshold are flagged with the delta", "every quarantine cites the failing rule and a sample", "accepted exceptions are recorded"], "receipts": ["dataset version", "profile metrics", "rule results", "quarantine status with samples"] }, "state": { "artifacts": ["quality report", "accepted-exception ledger"], "update_rule": "Persist the profile, rule results, and any new accepted exception so recurring shifts are not re-investigated." }, "budget": { "max_retries": 2, "max_runtime_minutes": 90 }, "escalation": { "conditions": ["ambiguous distribution shift", "schema change needing a data-contract update", "repeated upstream failure", "regulated or PII-bearing fields", "a rule that may itself be wrong"], "destination": "Dataset owner via the data-quality channel" }, "exit": { "success": "The version is validated against all hard rules and ready for promotion.", "stop_without_success": "The version is quarantined with evidence, or a shift needs owner judgment." } }