Update utils.py
Browse files
utils.py
CHANGED
|
@@ -282,7 +282,7 @@ def load_pdf_with_metadata(file_path):
|
|
| 282 |
content = page.get_text("text")
|
| 283 |
title = document.metadata.get("title", "Unbekannt")
|
| 284 |
page_number = page_num + 1
|
| 285 |
-
documents.append(Document(content=content, title=title, page=page_number, path=file_path))
|
| 286 |
return documents
|
| 287 |
|
| 288 |
#für WOrD Dokumente
|
|
@@ -294,7 +294,7 @@ def load_word_with_metadata(file_path):
|
|
| 294 |
for para in document.paragraphs:
|
| 295 |
content = para.text
|
| 296 |
page_number = 1 # Word-Dokumente haben keine Seitenzahlen in diesem Kontext
|
| 297 |
-
documents.append(Document(content=content, title=title, page=page_number, path=path))
|
| 298 |
return documents
|
| 299 |
|
| 300 |
|
|
|
|
| 282 |
content = page.get_text("text")
|
| 283 |
title = document.metadata.get("title", "Unbekannt")
|
| 284 |
page_number = page_num + 1
|
| 285 |
+
documents.append(Document(content=content, title=title, page=page_number, path=file_path, split_id=None))
|
| 286 |
return documents
|
| 287 |
|
| 288 |
#für WOrD Dokumente
|
|
|
|
| 294 |
for para in document.paragraphs:
|
| 295 |
content = para.text
|
| 296 |
page_number = 1 # Word-Dokumente haben keine Seitenzahlen in diesem Kontext
|
| 297 |
+
documents.append(Document(content=content, title=title, page=page_number, path=path, split_id= None))
|
| 298 |
return documents
|
| 299 |
|
| 300 |
|