| #!/usr/bin/env python3 | |
| from __future__ import annotations | |
| import sys | |
| from pathlib import Path | |
| import pandas as pd | |
| ROOT = Path(__file__).resolve().parents[1] | |
| SRC = ROOT / "src" | |
| if str(SRC) not in sys.path: | |
| sys.path.insert(0, str(SRC)) | |
| from dip_client import KB_COLUMNS | |
| path = ROOT / "data" / "dip_knowledge_base.csv" | |
| if not path.exists(): | |
| raise SystemExit("No data/dip_knowledge_base.csv found. Build the KB first.") | |
| df = pd.read_csv(path, dtype=str).fillna("") | |
| missing = [c for c in KB_COLUMNS if c not in df.columns] | |
| if missing: | |
| raise SystemExit(f"Missing columns: {missing}") | |
| if df.duplicated(subset=["resource_type", "dip_id"]).any(): | |
| raise SystemExit("Duplicate resource_type + dip_id rows found.") | |
| print(f"OK: {len(df)} rows, {len(df.columns)} columns.") | |