from langchain_community.document_loaders import PyPDFLoader from langchain_community.vectorstores import FAISS from langchain_huggingface import HuggingFaceEmbeddings from langchain_text_splitters import RecursiveCharacterTextSplitter from config import EMBEDDING_MODEL, FAISS_PATH, PDF_SOURCE def make_vectorization(): embedding_model = HuggingFaceEmbeddings(model_name=EMBEDDING_MODEL) loader = PyPDFLoader(PDF_SOURCE) documents = loader.load() text_splitter = RecursiveCharacterTextSplitter( chunk_size=1000, chunk_overlap=100, length_function=len, is_separator_regex=False ) documents = text_splitter.split_documents(documents) vectorstore = FAISS.from_documents(documents, embedding_model) vectorstore.save_local(FAISS_PATH) return vectorstore