|
|
|
|
|
""" |
|
|
Builds/refreshes the local RAG KB (data/kb.jsonl) from GitHub + local docs. |
|
|
|
|
|
Usage: |
|
|
python scripts/build_kb.py --config configs/rag_sources.yaml --out data/kb.jsonl |
|
|
python scripts/build_kb.py --config ... --out ... --force |
|
|
""" |
|
|
|
|
|
from __future__ import annotations |
|
|
import argparse |
|
|
import logging |
|
|
import os |
|
|
import sys |
|
|
from pathlib import Path |
|
|
|
|
|
|
|
|
ROOT = Path(__file__).resolve().parent.parent |
|
|
sys.path.insert(0, str(ROOT)) |
|
|
|
|
|
logger = logging.getLogger("build_kb") |
|
|
logging.basicConfig(level=logging.INFO, format="%(levelname)s %(message)s") |
|
|
|
|
|
|
|
|
try: |
|
|
from app.core.rag.build import build_kb_from_config, ensure_kb |
|
|
except Exception as e: |
|
|
logger.error("Failed importing KB builder from app.core.rag.build: %s", e) |
|
|
logger.error("Make sure you're running from the project root and PYTHONPATH includes '.'.") |
|
|
sys.exit(2) |
|
|
|
|
|
def main() -> int: |
|
|
p = argparse.ArgumentParser() |
|
|
p.add_argument("--config", required=True, help="Path to configs/rag_sources.yaml") |
|
|
p.add_argument("--out", required=True, help="Output JSONL file, e.g., data/kb.jsonl") |
|
|
p.add_argument("--force", action="store_true", help="Delete output file first, then rebuild") |
|
|
args = p.parse_args() |
|
|
|
|
|
out_path = Path(args.out) |
|
|
if args.force and out_path.exists(): |
|
|
logger.info("Removing existing %s", out_path) |
|
|
out_path.unlink() |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
n = build_kb_from_config(config_path=args.config, out_jsonl=args.out) |
|
|
logger.info("Wrote %d records to %s", n, args.out) |
|
|
return 0 |
|
|
|
|
|
if __name__ == "__main__": |
|
|
raise SystemExit(main()) |
|
|
|