File size: 2,963 Bytes
9ec4919
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
{
  "name": "Data quality",
  "objective": "Keep a recurring dataset or knowledge base trustworthy by validating each refresh against explicit quality rules before it is published.",
  "trigger": {
    "type": "event",
    "cadence_or_event": "After each dataset or knowledge-base refresh, or nightly; also when the source schema changes or a freshness SLA is breached."
  },
  "intake": {
    "sources": ["new dataset version", "schema and row counts", "prior accepted version", "quality rule definitions"],
    "selection_rule": "Validate the latest version against the data contract and quality rules; quarantine on any hard-rule failure."
  },
  "workspace": {
    "isolation": "Read access to the dataset, schema, profiling tools, and prior versions; quarantine area separate from production.",
    "allowed_actions": ["run validation queries", "profile the data", "write a quality report", "quarantine a failing version"],
    "disallowed_actions": ["edit source data", "publish to production", "relax rules without review"]
  },
  "context": {
    "required_files": ["data contract or schema", "quality-rule definitions", "ownership map"],
    "runtime_sources": ["load metrics", "profiling output", "diff against the last accepted version"]
  },
  "agents": [
    {
      "role": "Profiler",
      "responsibility": "Compute counts, distributions, null rates, and freshness for the new version."
    },
    {
      "role": "Validator",
      "responsibility": "Check the profile against quality rules and the prior accepted baseline."
    },
    {
      "role": "Investigator",
      "responsibility": "Classify each failure as a defect, an accepted shift, or a rule bug."
    },
    {
      "role": "Reporter",
      "responsibility": "Record results, quarantine bad versions, and propose rule fixes."
    }
  ],
  "verification": {
    "gates": ["hard rules pass before a version is promotable", "distribution shifts beyond threshold are flagged with the delta", "every quarantine cites the failing rule and a sample", "accepted exceptions are recorded"],
    "receipts": ["dataset version", "profile metrics", "rule results", "quarantine status with samples"]
  },
  "state": {
    "artifacts": ["quality report", "accepted-exception ledger"],
    "update_rule": "Persist the profile, rule results, and any new accepted exception so recurring shifts are not re-investigated."
  },
  "budget": {
    "max_retries": 2,
    "max_runtime_minutes": 90
  },
  "escalation": {
    "conditions": ["ambiguous distribution shift", "schema change needing a data-contract update", "repeated upstream failure", "regulated or PII-bearing fields", "a rule that may itself be wrong"],
    "destination": "Dataset owner via the data-quality channel"
  },
  "exit": {
    "success": "The version is validated against all hard rules and ready for promotion.",
    "stop_without_success": "The version is quarantined with evidence, or a shift needs owner judgment."
  }
}