| import argparse, csv, json |
| from pathlib import Path |
| try: |
| import yaml |
| except Exception: |
| yaml = None |
|
|
| def load_contract(path): |
| text = Path(path).read_text(encoding="utf-8") |
| if yaml: |
| return yaml.safe_load(text) |
| required = [] |
| for line in text.splitlines(): |
| if line.strip().startswith("- "): |
| required.append(line.strip()[2:]) |
| return {"required_fields": required, "quality_thresholds": {"completeness": 0.0}} |
|
|
| def validate(contract_path, file_path): |
| contract = load_contract(contract_path) |
| required = contract.get("required_fields", []) |
| with open(file_path, encoding="utf-8") as f: |
| rows = list(csv.DictReader(f)) |
| fields = rows[0].keys() if rows else [] |
| missing_fields = [f for f in required if f not in fields] |
| total_required = max(1, len(rows) * len(required)) |
| missing_values = sum(1 for row in rows for field in required if row.get(field, "") in ("", None)) |
| completeness = 1.0 - missing_values / total_required |
| schema_validity = 1.0 if not missing_fields else 0.0 |
| threshold = contract.get("quality_thresholds", {}).get("completeness", 0.0) |
| return { |
| "valid": not missing_fields and completeness >= threshold, |
| "row_count": len(rows), |
| "missing_fields": missing_fields, |
| "completeness": round(completeness, 4), |
| "schema_validity": schema_validity, |
| "quality_score": round(0.7 * completeness + 0.3 * schema_validity, 4), |
| } |
|
|
| if __name__ == "__main__": |
| parser = argparse.ArgumentParser() |
| parser.add_argument("--contract", required=True) |
| parser.add_argument("--file", required=True) |
| args = parser.parse_args() |
| print(json.dumps(validate(args.contract, args.file), indent=2)) |
|
|