Julia Ostheimer
commited on
Commit
·
c50cc21
1
Parent(s):
36de3fa
Add util function to increment page number +1 (as indexing starts at 0)
Browse files
app.py
CHANGED
|
@@ -67,7 +67,7 @@ def get_document_filename(document) -> str:
|
|
| 67 |
Extracts the filename from a document path.
|
| 68 |
|
| 69 |
Args:
|
| 70 |
-
document:
|
| 71 |
|
| 72 |
Returns:
|
| 73 |
str: The extracted document filename.
|
|
@@ -77,6 +77,21 @@ def get_document_filename(document) -> str:
|
|
| 77 |
|
| 78 |
return document_filename
|
| 79 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 80 |
def pretty_source_history_md():
|
| 81 |
unpretty_history = get_metadata()
|
| 82 |
if not unpretty_history:
|
|
@@ -106,7 +121,7 @@ def retrieve(query: str):
|
|
| 106 |
document_metadata_to_display_in_ui.append({
|
| 107 |
"query": query,
|
| 108 |
"filename": [get_document_filename(doc) for doc in retrieved_docs],
|
| 109 |
-
"number_of_pages": [doc
|
| 110 |
"source_text_chunk": [doc.page_content for doc in retrieved_docs]
|
| 111 |
})
|
| 112 |
|
|
|
|
| 67 |
Extracts the filename from a document path.
|
| 68 |
|
| 69 |
Args:
|
| 70 |
+
document: One document retrieved from querying the vector store.
|
| 71 |
|
| 72 |
Returns:
|
| 73 |
str: The extracted document filename.
|
|
|
|
| 77 |
|
| 78 |
return document_filename
|
| 79 |
|
| 80 |
+
def get_chunk_page_number(document) -> str:
|
| 81 |
+
"""
|
| 82 |
+
Retrieves the correct page number from where the chunk was extracted from a certain document. Info: The page number returned in the metadata object starts with page 0, so it has to be increased by 1.
|
| 83 |
+
|
| 84 |
+
Args:
|
| 85 |
+
document: One document retrieved from querying the vector store.
|
| 86 |
+
|
| 87 |
+
Returns:
|
| 88 |
+
str: The extracted page number as string.
|
| 89 |
+
"""
|
| 90 |
+
document_path = int(document.metadata["page"])
|
| 91 |
+
correct_page_number = document_path + 1
|
| 92 |
+
|
| 93 |
+
return str(correct_page_number)
|
| 94 |
+
|
| 95 |
def pretty_source_history_md():
|
| 96 |
unpretty_history = get_metadata()
|
| 97 |
if not unpretty_history:
|
|
|
|
| 121 |
document_metadata_to_display_in_ui.append({
|
| 122 |
"query": query,
|
| 123 |
"filename": [get_document_filename(doc) for doc in retrieved_docs],
|
| 124 |
+
"number_of_pages": [get_chunk_page_number(doc) for doc in retrieved_docs],
|
| 125 |
"source_text_chunk": [doc.page_content for doc in retrieved_docs]
|
| 126 |
})
|
| 127 |
|