BPL-RAG-Spring-2026 / config.py
han-na's picture
human in the loop and other changes
6910834
"""
Central configuration for the BPL RAG pipeline.
All tuneable constants live here β€” change here, affects everywhere.
"""
import os
from dotenv import load_dotenv
load_dotenv()
# ── OpenAI ────────────────────────────────────────────────────────────────────
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
OPENAI_CHAT_MODEL = "gpt-4o"
# OPENAI_EMBED_MODEL = "text-embedding-3-small" # fallback if not using BGE
# ── BGE Embedding ─────────────────────────────────────────────────────────────
BGE_MODEL_NAME = "BAAI/bge-m3"
BGE_DEVICE = "cpu" # V100 CC 7.0 incompatible with installed PyTorch (CC >=7.5)
BGE_BATCH_SIZE = 32 # lower if OOM
# ── Neo4j / GraphRAG ──────────────────────────────────────────────────────────
NEO4J_URI = os.getenv("NEO4J_URI", "")
NEO4J_USER = os.getenv("NEO4J_USER", "neo4j")
NEO4J_PASSWORD = os.getenv("NEO4J_PASSWORD", "")
# GraphRAG is triggered only for content_driven queries
GRAPH_RAG_ENABLED = True
GRAPH_TOP_K = 5000 # max additional docs from graph
GRAPH_MIN_ENTITY_MATCHES = 1 # min query entities a doc must match
# ── PostgreSQL / pgVector ─────────────────────────────────────────────────────
PG_HOST = os.getenv("PG_HOST", "localhost")
PG_PORT = int(os.getenv("PG_PORT", 5432))
PG_DB = os.getenv("PG_DB", "bpl_rag")
PG_USER = os.getenv("PG_USER", "postgres")
PG_PASSWORD = os.getenv("PG_PASSWORD", "")
PG_DSN = (
f"postgresql://{PG_USER}:{PG_PASSWORD}@{PG_HOST}:{PG_PORT}/{PG_DB}"
)
# ── Chunking ──────────────────────────────────────────────────────────────────
CHUNK_SIZE = 1024 # was 512 β€” BGE-M3 handles longer context well
CHUNK_OVERLAP = 150 # was 100 β€” proportionally larger overlap
CHUNK_TOKENIZER = "cl100k_base" # tiktoken encoding
# ── Retrieval ─────────────────────────────────────────────────────────────────
TOP_K_DENSE = 5000 # candidates from vector search before rerank
TOP_K_BM25 = 5000 # candidates from BM25
TOP_K_FINAL = 50 # results returned to the user
RRF_K = 60 # RRF constant (standard is 60)
# ── Metadata score blend weight ───────────────────────────────────────────────
# final_score = CONTENT_WEIGHT * content_rrf + METADATA_WEIGHT * metadata_sim
CONTENT_WEIGHT = 0.80
METADATA_WEIGHT = 0.20
# ── Ingestion ─────────────────────────────────────────────────────────────────
MIN_CHAR_COUNT = 100 # skip records with fewer chars of raw_text
JSON_DUMP_DIR = "data/raw" # folder containing local JSON dumps
# ── Generation ───────────────────────────────────────────────────────────────
MAX_CONTEXT_CHUNKS = 5 # how many chunks to pass to GPT-4o
GENERATION_MAX_TOKENS = 600
MIN_RELEVANCE_SCORE = 0.1 # documents below this are considered irrelevant