Chatbot_LLMs / vector_db.py
Kimty's picture
Upload file
e0e6e48
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain.document_loaders import PyPDFLoader, DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from config import *
def faiss_vector_db():
dir_loader = DirectoryLoader(
DATA_DIR_PATH,
glob='*.pdf',
loader_cls=PyPDFLoader
)
docs = dir_loader.load()
print("PDFs Loaded")
txt_splitter = RecursiveCharacterTextSplitter(
chunk_size=CHUNK_SIZE,
chunk_overlap=CHUNK_OVERLAP
)
inp_txt = txt_splitter.split_documents(docs)
print("Data Chunks Created")
hfembeddings = HuggingFaceEmbeddings(
model_name=EMBEDDER,
model_kwargs={'device': 'cpu'}
)
db = FAISS.from_documents(inp_txt, hfembeddings)
db.save_local(VECTOR_DB_PATH)
print("Vector Store Creation Completed")
if __name__ == "__main__":
faiss_vector_db()