Spaces:
Sleeping
Sleeping
File size: 822 Bytes
60102cd d4ecde8 60102cd d4ecde8 60102cd d4ecde8 60102cd d4ecde8 60102cd d4ecde8 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 | from langchain_community.document_loaders import PyPDFLoader
from langchain_community.vectorstores import FAISS
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter
from config import EMBEDDING_MODEL, FAISS_PATH, PDF_SOURCE
def make_vectorization():
embedding_model = HuggingFaceEmbeddings(model_name=EMBEDDING_MODEL)
loader = PyPDFLoader(PDF_SOURCE)
documents = loader.load()
text_splitter = RecursiveCharacterTextSplitter(
chunk_size=1000,
chunk_overlap=100,
length_function=len,
is_separator_regex=False
)
documents = text_splitter.split_documents(documents)
vectorstore = FAISS.from_documents(documents, embedding_model)
vectorstore.save_local(FAISS_PATH)
return vectorstore
|