from langchain_community.document_loaders import PyPDFLoader from langchain_text_splitters import RecursiveCharacterTextSplitter from langchain_community.vectorstores import FAISS from src.config.config import Config class DocumentProcessor: def __init__(self, embeddings): self.embeddings = embeddings def process_pdfs(self, pdf_paths): try: documents = [] for path in pdf_paths: loader = PyPDFLoader(path) documents.extend(loader.load()) splitter = RecursiveCharacterTextSplitter( chunk_size=Config.CHUNK_SIZE, chunk_overlap=Config.CHUNK_OVERLAP ) splits = splitter.split_documents(documents) return FAISS.from_documents(splits, self.embeddings) except Exception as e: raise RuntimeError(f"Document processing failed: {e}")