Medical-Chatbot / src /helper.py
Kishore200630's picture
Upload 12 files
9107f15 verified
raw
history blame contribute delete
810 Bytes
from langchain_community.document_loaders import PyPDFLoader, DirectoryLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_huggingface import HuggingFaceEmbeddings
def load_pdf_file(data):
loader = DirectoryLoader(
data,
glob="*.pdf",
loader_cls=PyPDFLoader
)
documents = loader.load()
return documents
def text_split(extracted_data):
text_splitter = RecursiveCharacterTextSplitter(
chunk_size=500,
chunk_overlap=20
)
text_chunks = text_splitter.split_documents(extracted_data)
return text_chunks
def download_hugging_face_embeddings():
embeddings = HuggingFaceEmbeddings(
model_name='sentence-transformers/all-MiniLM-L6-v2'
)
return embeddings