climate-risk-engine / scripts /build_rag_index.py
jtlevine's picture
Add LSTM neural model, ERA5 data, FAISS+BM25 RAG, Neon DB, eval suite; de-jargon frontend
f2b0895
raw
history blame contribute delete
662 Bytes
#!/usr/bin/env python3
"""Build the FAISS + BM25 RAG index from the knowledge base.
Usage:
python3 scripts/build_rag_index.py
Run this before starting the app, or include it in the Dockerfile.
"""
import logging
import sys
from pathlib import Path
# Ensure project root is on sys.path
project_root = Path(__file__).resolve().parents[1]
sys.path.insert(0, str(project_root))
logging.basicConfig(level=logging.INFO, format="%(levelname)s: %(message)s")
from src.explanation.rag_index_builder import build_index
index, corpus, bm25 = build_index(force_rebuild=True)
print(f"RAG index built successfully: {len(corpus)} documents, {index.ntotal} vectors")