Julia Ostheimer commited on
Commit
c50cc21
·
1 Parent(s): 36de3fa

Add util function to increment page number +1 (as indexing starts at 0)

Browse files
Files changed (1) hide show
  1. app.py +17 -2
app.py CHANGED
@@ -67,7 +67,7 @@ def get_document_filename(document) -> str:
67
  Extracts the filename from a document path.
68
 
69
  Args:
70
- document: Once document retrieved from querying the vector store.
71
 
72
  Returns:
73
  str: The extracted document filename.
@@ -77,6 +77,21 @@ def get_document_filename(document) -> str:
77
 
78
  return document_filename
79
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
80
  def pretty_source_history_md():
81
  unpretty_history = get_metadata()
82
  if not unpretty_history:
@@ -106,7 +121,7 @@ def retrieve(query: str):
106
  document_metadata_to_display_in_ui.append({
107
  "query": query,
108
  "filename": [get_document_filename(doc) for doc in retrieved_docs],
109
- "number_of_pages": [doc.metadata["page"] for doc in retrieved_docs],
110
  "source_text_chunk": [doc.page_content for doc in retrieved_docs]
111
  })
112
 
 
67
  Extracts the filename from a document path.
68
 
69
  Args:
70
+ document: One document retrieved from querying the vector store.
71
 
72
  Returns:
73
  str: The extracted document filename.
 
77
 
78
  return document_filename
79
 
80
+ def get_chunk_page_number(document) -> str:
81
+ """
82
+ Retrieves the correct page number from where the chunk was extracted from a certain document. Info: The page number returned in the metadata object starts with page 0, so it has to be increased by 1.
83
+
84
+ Args:
85
+ document: One document retrieved from querying the vector store.
86
+
87
+ Returns:
88
+ str: The extracted page number as string.
89
+ """
90
+ document_path = int(document.metadata["page"])
91
+ correct_page_number = document_path + 1
92
+
93
+ return str(correct_page_number)
94
+
95
  def pretty_source_history_md():
96
  unpretty_history = get_metadata()
97
  if not unpretty_history:
 
121
  document_metadata_to_display_in_ui.append({
122
  "query": query,
123
  "filename": [get_document_filename(doc) for doc in retrieved_docs],
124
+ "number_of_pages": [get_chunk_page_number(doc) for doc in retrieved_docs],
125
  "source_text_chunk": [doc.page_content for doc in retrieved_docs]
126
  })
127