Julia Ostheimer commited on
Commit
36de3fa
·
1 Parent(s): d0bf374

Show page number of chunks and dismiss author and creation date in source metadata

Browse files
Files changed (1) hide show
  1. app.py +3 -4
app.py CHANGED
@@ -85,11 +85,11 @@ def pretty_source_history_md():
85
  markdown_string = ""
86
  for idx, record in enumerate(unpretty_history):
87
  markdown_string += f"## Query {idx+1}: `{record['query']}`\n"
88
- for filename, author, creation_date, chunk in zip(record["filename"], record["author"], record["creation_date"], record["source_text_chunk"]):
89
  # Clean up chunk: remove newlines and trim spaces
90
  cleaned_chunk = chunk.replace("\n", " ").replace("\r", " ").strip()
91
 
92
- markdown_string += f"- **Dokument**: {filename}, **Autor(en)**: {author}, **Erstellungsdatum**: {creation_date}\n\n> {cleaned_chunk} \n\n"
93
  markdown_string += "---\n"
94
  return markdown_string
95
 
@@ -106,8 +106,7 @@ def retrieve(query: str):
106
  document_metadata_to_display_in_ui.append({
107
  "query": query,
108
  "filename": [get_document_filename(doc) for doc in retrieved_docs],
109
- "author": [doc.metadata["Author"] for doc in retrieved_docs],
110
- "creation_date": [doc.metadata["CreationDate"] for doc in retrieved_docs],
111
  "source_text_chunk": [doc.page_content for doc in retrieved_docs]
112
  })
113
 
 
85
  markdown_string = ""
86
  for idx, record in enumerate(unpretty_history):
87
  markdown_string += f"## Query {idx+1}: `{record['query']}`\n"
88
+ for filename, page, chunk in zip(record["filename"], record["number_of_pages"], record["source_text_chunk"]):
89
  # Clean up chunk: remove newlines and trim spaces
90
  cleaned_chunk = chunk.replace("\n", " ").replace("\r", " ").strip()
91
 
92
+ markdown_string += f"- **Dokument**: {filename}, **Seite**: {page}\n\n> {cleaned_chunk} \n\n"
93
  markdown_string += "---\n"
94
  return markdown_string
95
 
 
106
  document_metadata_to_display_in_ui.append({
107
  "query": query,
108
  "filename": [get_document_filename(doc) for doc in retrieved_docs],
109
+ "number_of_pages": [doc.metadata["page"] for doc in retrieved_docs],
 
110
  "source_text_chunk": [doc.page_content for doc in retrieved_docs]
111
  })
112