Spaces:
Build error
Build error
Farid Karimli
commited on
Commit
·
39c29a9
1
Parent(s):
0339679
PyMUPDFReader fix and cleanup
Browse files
code/modules/dataloader/data_loader.py
CHANGED
|
@@ -105,11 +105,7 @@ class FileReader:
|
|
| 105 |
return text
|
| 106 |
|
| 107 |
def read_pdf(self, temp_file_path: str):
|
| 108 |
-
|
| 109 |
-
documents = self.pdf_reader.parse(temp_file_path) # asyncio.run(self.pdf_reader.parse(temp_file_path)) if using async
|
| 110 |
-
else:
|
| 111 |
-
loader = self.pdf_reader.get_loader(temp_file_path)
|
| 112 |
-
documents = self.pdf_reader.get_documents(loader)
|
| 113 |
return documents
|
| 114 |
|
| 115 |
def read_txt(self, temp_file_path: str):
|
|
@@ -289,7 +285,6 @@ class ChunkProcessor:
|
|
| 289 |
)
|
| 290 |
self.document_chunks_full.extend(document_chunks)
|
| 291 |
|
| 292 |
-
print(f"Processed {file_path}. File_data: {file_data}")
|
| 293 |
self.document_data[file_path] = file_data
|
| 294 |
self.document_metadata[file_path] = file_metadata
|
| 295 |
|
|
|
|
| 105 |
return text
|
| 106 |
|
| 107 |
def read_pdf(self, temp_file_path: str):
|
| 108 |
+
documents = self.pdf_reader.parse(temp_file_path)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 109 |
return documents
|
| 110 |
|
| 111 |
def read_txt(self, temp_file_path: str):
|
|
|
|
| 285 |
)
|
| 286 |
self.document_chunks_full.extend(document_chunks)
|
| 287 |
|
|
|
|
| 288 |
self.document_data[file_path] = file_data
|
| 289 |
self.document_metadata[file_path] = file_metadata
|
| 290 |
|