jansahayak / rag /scheme_vectorstore.py
Anmol4521's picture
Upload 95 files
388aa42 verified
"""
Scheme Vectorstore Module
Builds and loads FAISS vectorstore for government schemes
"""
import os
from langchain_community.vectorstores import FAISS
from langchain_community.document_loaders import PyPDFLoader
from rag.embeddings import get_embeddings
def build_scheme_vectorstore():
"""
Reads all PDFs from data/schemes_pdfs/ and builds FAISS index
Run this once to initialize the vectorstore
"""
documents = []
folder = "data/schemes_pdfs"
if not os.path.exists(folder):
os.makedirs(folder)
print(f"Created {folder}. Please add scheme PDFs to this folder.")
return
pdf_files = [f for f in os.listdir(folder) if f.endswith(".pdf")]
if not pdf_files:
print(f"No PDF files found in {folder}. Please add scheme PDFs.")
return
for file in pdf_files:
print(f"Processing {file}...")
loader = PyPDFLoader(os.path.join(folder, file))
documents.extend(loader.load())
if not documents:
print("No documents extracted. Check PDF files.")
return
print(f"Loaded {len(documents)} document chunks. Building vectorstore...")
embeddings = get_embeddings()
vectorstore = FAISS.from_documents(documents, embeddings)
os.makedirs("rag/scheme_index", exist_ok=True)
vectorstore.save_local("rag/scheme_index")
print("Scheme vectorstore built successfully!")
def load_scheme_vectorstore():
"""
Loads pre-built scheme vectorstore
Returns FAISS vectorstore instance
Raises:
FileNotFoundError: If vectorstore files don't exist
RuntimeError: If embeddings fail to load
"""
if not os.path.exists("rag/scheme_index/index.faiss"):
raise FileNotFoundError(
"Scheme vectorstore not found at rag/scheme_index/index.faiss. "
"Run 'python init_embeddings.py' or build_scheme_vectorstore() first."
)
print("πŸ“‚ Loading scheme vectorstore...")
embeddings = get_embeddings()
vectorstore = FAISS.load_local("rag/scheme_index", embeddings, allow_dangerous_deserialization=True)
print("βœ… Scheme vectorstore loaded successfully")
return vectorstore