from langchain_community.document_loaders import DirectoryLoader, PyPDFLoader from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain_huggingface import HuggingFaceEmbeddings #Extract Data From the PDF File def load_pdf_file(data): loader= DirectoryLoader(data, glob="*.pdf", loader_cls=PyPDFLoader) documents=loader.load() return documents #Split the Data into Text Chunks def text_split(extracted_data): text_splitter=RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=20) text_chunks=text_splitter.split_documents(extracted_data) return text_chunks #Download the Embeddings from Hugging Face def download_hugging_face_embeddings(): embeddings=HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2') return embeddings