| |
| """Build the local DIP knowledge base for the German Promise Tracker. |
| |
| Example: |
| export DIP_API_KEY="..." |
| python scripts/build_dip_knowledge_base.py \ |
| --resources vorgang vorgangsposition drucksache \ |
| --wahlperiode 21 \ |
| --date-start 2025-01-01 \ |
| --max-pages 5 |
| """ |
|
|
| from __future__ import annotations |
|
|
| import argparse |
| import os |
| import sys |
| from pathlib import Path |
|
|
| ROOT = Path(__file__).resolve().parents[1] |
| SRC = ROOT / "src" |
| if str(SRC) not in sys.path: |
| sys.path.insert(0, str(SRC)) |
|
|
| from dip_client import RESOURCE_TYPES, build_knowledge_base, build_query_params, save_knowledge_base |
|
|
|
|
| def parse_args() -> argparse.Namespace: |
| parser = argparse.ArgumentParser(description="Fetch Bundestag DIP records and build a normalised CSV knowledge base.") |
| parser.add_argument("--api-key", default=os.environ.get("DIP_API_KEY", ""), help="DIP API key. Defaults to env var DIP_API_KEY.") |
| parser.add_argument("--resources", nargs="+", default=["vorgang", "vorgangsposition", "drucksache"], choices=RESOURCE_TYPES) |
| parser.add_argument("--wahlperiode", type=int, default=21, help="Bundestag election period, e.g. 21.") |
| parser.add_argument("--date-start", default="", help="DIP f.datum.start, e.g. 2025-01-01.") |
| parser.add_argument("--date-end", default="", help="DIP f.datum.end, e.g. 2026-12-31.") |
| parser.add_argument("--updated-start", default="", help="DIP f.aktualisiert.start, e.g. 2026-05-01T00:00:00+02:00.") |
| parser.add_argument("--updated-end", default="", help="DIP f.aktualisiert.end.") |
| parser.add_argument("--zuordnung", default="", choices=["", "BT", "BR", "BV", "EK"], help="DIP f.zuordnung filter.") |
| parser.add_argument("--max-pages", type=int, default=2, help="Cursor pages per resource.") |
| parser.add_argument("--output-dir", default=str(ROOT / "data"), help="Output directory.") |
| return parser.parse_args() |
|
|
|
|
| def main() -> None: |
| args = parse_args() |
| if not args.api_key: |
| raise SystemExit("Missing API key. Set DIP_API_KEY or pass --api-key.") |
|
|
| params = build_query_params( |
| wahlperiode=args.wahlperiode, |
| date_start=args.date_start or None, |
| date_end=args.date_end or None, |
| updated_start=args.updated_start or None, |
| updated_end=args.updated_end or None, |
| zuordnung=args.zuordnung or None, |
| ) |
| df, raw_docs, metadata = build_knowledge_base( |
| api_key=args.api_key, |
| resources=args.resources, |
| params=params, |
| max_pages_per_resource=args.max_pages, |
| ) |
| save_knowledge_base(df, raw_docs, metadata, Path(args.output_dir)) |
| print(f"Saved {len(df)} unique DIP records to {args.output_dir}") |
| print(metadata) |
|
|
|
|
| if __name__ == "__main__": |
| main() |
|
|