#!/usr/bin/env python3 """Build the local DIP knowledge base for the German Promise Tracker. Example: export DIP_API_KEY="..." python scripts/build_dip_knowledge_base.py \ --resources vorgang vorgangsposition drucksache \ --wahlperiode 21 \ --date-start 2025-01-01 \ --max-pages 5 """ from __future__ import annotations import argparse import os import sys from pathlib import Path ROOT = Path(__file__).resolve().parents[1] SRC = ROOT / "src" if str(SRC) not in sys.path: sys.path.insert(0, str(SRC)) from dip_client import RESOURCE_TYPES, build_knowledge_base, build_query_params, save_knowledge_base def parse_args() -> argparse.Namespace: parser = argparse.ArgumentParser(description="Fetch Bundestag DIP records and build a normalised CSV knowledge base.") parser.add_argument("--api-key", default=os.environ.get("DIP_API_KEY", ""), help="DIP API key. Defaults to env var DIP_API_KEY.") parser.add_argument("--resources", nargs="+", default=["vorgang", "vorgangsposition", "drucksache"], choices=RESOURCE_TYPES) parser.add_argument("--wahlperiode", type=int, default=21, help="Bundestag election period, e.g. 21.") parser.add_argument("--date-start", default="", help="DIP f.datum.start, e.g. 2025-01-01.") parser.add_argument("--date-end", default="", help="DIP f.datum.end, e.g. 2026-12-31.") parser.add_argument("--updated-start", default="", help="DIP f.aktualisiert.start, e.g. 2026-05-01T00:00:00+02:00.") parser.add_argument("--updated-end", default="", help="DIP f.aktualisiert.end.") parser.add_argument("--zuordnung", default="", choices=["", "BT", "BR", "BV", "EK"], help="DIP f.zuordnung filter.") parser.add_argument("--max-pages", type=int, default=2, help="Cursor pages per resource.") parser.add_argument("--output-dir", default=str(ROOT / "data"), help="Output directory.") return parser.parse_args() def main() -> None: args = parse_args() if not args.api_key: raise SystemExit("Missing API key. Set DIP_API_KEY or pass --api-key.") params = build_query_params( wahlperiode=args.wahlperiode, date_start=args.date_start or None, date_end=args.date_end or None, updated_start=args.updated_start or None, updated_end=args.updated_end or None, zuordnung=args.zuordnung or None, ) df, raw_docs, metadata = build_knowledge_base( api_key=args.api_key, resources=args.resources, params=params, max_pages_per_resource=args.max_pages, ) save_knowledge_base(df, raw_docs, metadata, Path(args.output_dir)) print(f"Saved {len(df)} unique DIP records to {args.output_dir}") print(metadata) if __name__ == "__main__": main()