| from langchain_community.document_loaders import PyPDFLoader | |
| from langchain_text_splitters import RecursiveCharacterTextSplitter | |
| from langchain_community.vectorstores import FAISS | |
| from src.config.config import Config | |
| class DocumentProcessor: | |
| def __init__(self, embeddings): | |
| self.embeddings = embeddings | |
| def process_pdfs(self, pdf_paths): | |
| try: | |
| documents = [] | |
| for path in pdf_paths: | |
| loader = PyPDFLoader(path) | |
| documents.extend(loader.load()) | |
| splitter = RecursiveCharacterTextSplitter( | |
| chunk_size=Config.CHUNK_SIZE, | |
| chunk_overlap=Config.CHUNK_OVERLAP | |
| ) | |
| splits = splitter.split_documents(documents) | |
| return FAISS.from_documents(splits, self.embeddings) | |
| except Exception as e: | |
| raise RuntimeError(f"Document processing failed: {e}") | |