Spaces:
Sleeping
Sleeping
| from langchain_community.document_loaders import PyPDFLoader | |
| from langchain_community.vectorstores import FAISS | |
| from langchain_huggingface import HuggingFaceEmbeddings | |
| from langchain_text_splitters import RecursiveCharacterTextSplitter | |
| from config import EMBEDDING_MODEL, FAISS_PATH, PDF_SOURCE | |
| def make_vectorization(): | |
| embedding_model = HuggingFaceEmbeddings(model_name=EMBEDDING_MODEL) | |
| loader = PyPDFLoader(PDF_SOURCE) | |
| documents = loader.load() | |
| text_splitter = RecursiveCharacterTextSplitter( | |
| chunk_size=1000, | |
| chunk_overlap=100, | |
| length_function=len, | |
| is_separator_regex=False | |
| ) | |
| documents = text_splitter.split_documents(documents) | |
| vectorstore = FAISS.from_documents(documents, embedding_model) | |
| vectorstore.save_local(FAISS_PATH) | |
| return vectorstore | |