Spaces:
Sleeping
Sleeping
File size: 3,883 Bytes
1d8ed3b 770aec3 1d8ed3b 6eb8469 1d8ed3b 1c7d8a0 1d8ed3b 6eb8469 1d8ed3b 9bf818a 1d8ed3b 25b9fab 1d8ed3b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 |
import streamlit as st
import fitz # PyMuPDF
from PIL import Image
import io
import os
DEFAULT_SESSION_STATE = {
# PDF Upload
'doc': None,
'uploaded_pdf_name': None,
'pdf_changed': False,
'uploaded_pdf_bytes': None,
'page_range_set' : False,
'page_range_updated' : False,
'full_text': None,
'pages_data_infos': None,
# TOC
'page_choice': None,
'toc_page_range': None,
'toc': None,
# Chapters
'chapters_starting_page': None,
'chapters_dict': None,
'chapters_extracted': None,
'chapters_chunked': None,
'selected_chapter_idx': None,
'selected_chapter_title': None,
'num_questions': None,
'chapter_selected_chunks': None,
'chapter_prompt': None,
# Topics
'query': None,
'questions_ready_topic': False,
# Questions
'questions_dict_chapter': None,
'questions_dict_topic': None,
'raw_output': None, # remove this (only for debug)
'questions_ready_chapter': False,
'questions_to_download' : {}
}
def initialise_session_state():
"""
Initializes session state variables on a new session.
"""
if 'session_initialized' not in st.session_state:
# New session detected
for key, default_val in DEFAULT_SESSION_STATE.items():
st.session_state[key] = default_val
# Clear questions_to_download on session start
st.session_state['questions_to_download'] = {}
# Mark session as initialized
st.session_state['session_initialized'] = True
"""
def initialise_session_state():
'Initializes the session state variables if not already set.'
for key, default_val in DEFAULT_SESSION_STATE.items():
if key not in st.session_state:
st.session_state[key] = default_val
"""
def reset_session_state_on_upload():
"""
Resets session state variables to their default values.
"""
for key, default_val in DEFAULT_SESSION_STATE.items():
if key != 'questions_to_download':
st.session_state[key] = default_val
def upload_pdf():
uploaded_file = st.file_uploader("Upload your PDF", type=["pdf"], label_visibility="collapsed")
if uploaded_file is not None:
prev_file = st.session_state.get('uploaded_pdf_name')
if uploaded_file.name != prev_file:
# New file detected
reset_session_state_on_upload()
st.session_state['pdf_changed'] = True
else:
st.session_state['pdf_changed'] = False
pdf_bytes = uploaded_file.read()
if pdf_bytes:
st.session_state['uploaded_pdf_bytes'] = pdf_bytes
st.session_state['uploaded_pdf_name'] = uploaded_file.name
st.success(f"File '{uploaded_file.name}' uploaded successfully!")
else:
st.error("Uploaded file is empty!")
elif uploaded_file is None and st.session_state.get('uploaded_pdf_bytes') is not None:
st.success("File uploaded successfully!")
else:
st.info("Please upload a PDF file to proceed.")
def show_pdf_preview():
if 'uploaded_pdf_bytes' in st.session_state:
pdf_bytes = st.session_state['uploaded_pdf_bytes']
doc = None
try:
doc = fitz.open(stream=pdf_bytes, filetype="pdf")
if doc.page_count < 1:
st.sidebar.error("PDF has no pages!")
return
page = doc.load_page(0)
pix = page.get_pixmap()
img = Image.open(io.BytesIO(pix.tobytes("png")))
st.sidebar.image(img, caption="First page preview", use_container_width=True)
except Exception as e:
st.sidebar.error(f"Failed to open PDF: {e}")
finally:
if doc is not None:
doc.close()
else:
st.sidebar.write("Upload a PDF to see a preview here.")
|