Update utils.py
Browse files
utils.py
CHANGED
|
@@ -282,7 +282,7 @@ def access_pdf(self, filename):
|
|
| 282 |
return temp_path
|
| 283 |
|
| 284 |
#besseren directory Loader als CustomLoader definieren, der den inhalt des dokuemnts, die seitenzahlen, die überschriften und die pfadezu den dokumenten extrahieren
|
| 285 |
-
def create_directory_loader(file_type, directory_path):
|
| 286 |
loaders = {
|
| 287 |
'.pdf': load_pdf_with_metadata,
|
| 288 |
'.word': load_word_with_metadata,
|
|
@@ -307,9 +307,10 @@ def create_directory_loader(file_type, directory_path):
|
|
| 307 |
|
| 308 |
return CustomLoader(directory_path, file_type, loaders[file_type])
|
| 309 |
"""
|
| 310 |
-
|
| 311 |
-
|
| 312 |
self.file_type = file_type
|
|
|
|
| 313 |
self.loader_func = loader_func
|
| 314 |
|
| 315 |
def load(self):
|
|
@@ -437,8 +438,11 @@ def document_loading_splitting():
|
|
| 437 |
|
| 438 |
|
| 439 |
# Erstellen von DirectoryLoader für jeden Dateityp
|
| 440 |
-
pdf_loader = create_directory_loader('.pdf', CHROMA_PDF)
|
| 441 |
-
word_loader = create_directory_loader('.word', CHROMA_WORD)
|
|
|
|
|
|
|
|
|
|
| 442 |
|
| 443 |
|
| 444 |
|
|
|
|
| 282 |
return temp_path
|
| 283 |
|
| 284 |
#besseren directory Loader als CustomLoader definieren, der den inhalt des dokuemnts, die seitenzahlen, die überschriften und die pfadezu den dokumenten extrahieren
|
| 285 |
+
def create_custom_loader(file_type, file_list): #create_directory_loader(file_type, directory_path):
|
| 286 |
loaders = {
|
| 287 |
'.pdf': load_pdf_with_metadata,
|
| 288 |
'.word': load_word_with_metadata,
|
|
|
|
| 307 |
|
| 308 |
return CustomLoader(directory_path, file_type, loaders[file_type])
|
| 309 |
"""
|
| 310 |
+
|
| 311 |
+
def __init__(self, file_type, file_list, loader_func):
|
| 312 |
self.file_type = file_type
|
| 313 |
+
self.file_list = file_list
|
| 314 |
self.loader_func = loader_func
|
| 315 |
|
| 316 |
def load(self):
|
|
|
|
| 438 |
|
| 439 |
|
| 440 |
# Erstellen von DirectoryLoader für jeden Dateityp
|
| 441 |
+
# pdf_loader = create_directory_loader('.pdf', CHROMA_PDF)
|
| 442 |
+
#word_loader = create_directory_loader('.word', CHROMA_WORD)
|
| 443 |
+
|
| 444 |
+
pdf_loader = create_custom_loader('.pdf', pdf_files)
|
| 445 |
+
word_loader = create_custom_loader('.docx', word_files)
|
| 446 |
|
| 447 |
|
| 448 |
|