Spaces:
Build error
Build error
Update document_processor.py
Browse files- document_processor.py +2 -3
document_processor.py
CHANGED
|
@@ -1,9 +1,8 @@
|
|
| 1 |
-
from langchain_community.document_loaders import
|
| 2 |
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
| 3 |
|
| 4 |
-
directory = PyPDFDirectoryLoader("documents/")
|
| 5 |
def read_documents(directory):
|
| 6 |
-
return
|
| 7 |
|
| 8 |
def chunk_data(docs, chunk_size=800, chunk_overlap=40):
|
| 9 |
return RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap).split_documents(docs)
|
|
|
|
| 1 |
+
from langchain_community.document_loaders import PyPDFLoader
|
| 2 |
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
| 3 |
|
|
|
|
| 4 |
def read_documents(directory):
|
| 5 |
+
return PyPDFLoader(directory).load()
|
| 6 |
|
| 7 |
def chunk_data(docs, chunk_size=800, chunk_overlap=40):
|
| 8 |
return RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap).split_documents(docs)
|