Spaces:
Sleeping
Sleeping
Update PDF_Reader.py
Browse files- PDF_Reader.py +2 -1
PDF_Reader.py
CHANGED
|
@@ -2,6 +2,7 @@ from langchain_experimental.text_splitter import SemanticChunker
|
|
| 2 |
from langchain_chroma import Chroma
|
| 3 |
from langchain_community.document_loaders import PyPDFLoader
|
| 4 |
from langchain.embeddings import HuggingFaceEmbeddings
|
|
|
|
| 5 |
embedding_modelPath = "sentence-transformers/all-MiniLM-l6-v2"
|
| 6 |
embeddings = HuggingFaceEmbeddings(model_name=embedding_modelPath,model_kwargs = {'device':'cpu'},encode_kwargs = {'normalize_embeddings': False})
|
| 7 |
|
|
@@ -20,7 +21,7 @@ def replace_t_with_space(list_of_documents):
|
|
| 20 |
doc.page_content = doc.page_content.replace('\t', ' ') # Replace tabs with spaces
|
| 21 |
return list_of_documents
|
| 22 |
|
| 23 |
-
def read_pdf(
|
| 24 |
loader = PyPDFLoader(pdf_path)
|
| 25 |
docs = loader.load()
|
| 26 |
print("Total Documents :",len(docs))
|
|
|
|
| 2 |
from langchain_chroma import Chroma
|
| 3 |
from langchain_community.document_loaders import PyPDFLoader
|
| 4 |
from langchain.embeddings import HuggingFaceEmbeddings
|
| 5 |
+
|
| 6 |
embedding_modelPath = "sentence-transformers/all-MiniLM-l6-v2"
|
| 7 |
embeddings = HuggingFaceEmbeddings(model_name=embedding_modelPath,model_kwargs = {'device':'cpu'},encode_kwargs = {'normalize_embeddings': False})
|
| 8 |
|
|
|
|
| 21 |
doc.page_content = doc.page_content.replace('\t', ' ') # Replace tabs with spaces
|
| 22 |
return list_of_documents
|
| 23 |
|
| 24 |
+
def read_pdf(pdf_path):
|
| 25 |
loader = PyPDFLoader(pdf_path)
|
| 26 |
docs = loader.load()
|
| 27 |
print("Total Documents :",len(docs))
|