Nahiyan14 commited on
Commit
79338e8
·
verified ·
1 Parent(s): ad51c6c

Update src/helper.py

Browse files
Files changed (1) hide show
  1. src/helper.py +28 -29
src/helper.py CHANGED
@@ -1,30 +1,29 @@
1
- from langchain.document_loaders import PyPDFLoader, DirectoryLoader
2
- from langchain.text_splitter import RecursiveCharacterTextSplitter
3
- from langchain.embeddings import HuggingFaceEmbeddings
4
- from sentence_transformers import SentenceTransformer
5
-
6
-
7
- #Extract Data From the PDF File
8
- def load_pdf_file(data):
9
- loader= DirectoryLoader(data,
10
- glob="*.pdf",
11
- loader_cls=PyPDFLoader)
12
-
13
- documents=loader.load()
14
-
15
- return documents
16
-
17
-
18
-
19
- #Split the Data into Text Chunks
20
- def text_split(extracted_data):
21
- text_splitter=RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=20)
22
- text_chunks=text_splitter.split_documents(extracted_data)
23
- return text_chunks
24
-
25
-
26
-
27
- #Download the Embeddings from HuggingFace
28
- def download_hugging_face_embeddings():
29
- embeddings=HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2') #this model return 384 dimensions
30
  return embeddings
 
1
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
2
+ from sentence_transformers import SentenceTransformer
3
+ from langchain_community.document_loaders import PyPDFLoader, DirectoryLoader
4
+ from langchain_community.embeddings import HuggingFaceEmbeddings
5
+
6
+ #Extract Data From the PDF File
7
+ def load_pdf_file(data):
8
+ loader= DirectoryLoader(data,
9
+ glob="*.pdf",
10
+ loader_cls=PyPDFLoader)
11
+
12
+ documents=loader.load()
13
+
14
+ return documents
15
+
16
+
17
+
18
+ #Split the Data into Text Chunks
19
+ def text_split(extracted_data):
20
+ text_splitter=RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=20)
21
+ text_chunks=text_splitter.split_documents(extracted_data)
22
+ return text_chunks
23
+
24
+
25
+
26
+ #Download the Embeddings from HuggingFace
27
+ def download_hugging_face_embeddings():
28
+ embeddings=HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2') #this model return 384 dimensions
 
29
  return embeddings