test2text / app /main_IO.py
davidepanza's picture
Update app/main_IO.py
1c7d8a0 verified
import streamlit as st
import fitz # PyMuPDF
from PIL import Image
import io
import os
DEFAULT_SESSION_STATE = {
# PDF Upload
'doc': None,
'uploaded_pdf_name': None,
'pdf_changed': False,
'uploaded_pdf_bytes': None,
'page_range_set' : False,
'page_range_updated' : False,
'full_text': None,
'pages_data_infos': None,
# TOC
'page_choice': None,
'toc_page_range': None,
'toc': None,
# Chapters
'chapters_starting_page': None,
'chapters_dict': None,
'chapters_extracted': None,
'chapters_chunked': None,
'selected_chapter_idx': None,
'selected_chapter_title': None,
'num_questions': None,
'chapter_selected_chunks': None,
'chapter_prompt': None,
# Topics
'query': None,
'questions_ready_topic': False,
# Questions
'questions_dict_chapter': None,
'questions_dict_topic': None,
'raw_output': None, # remove this (only for debug)
'questions_ready_chapter': False,
'questions_to_download' : {}
}
def initialise_session_state():
"""
Initializes session state variables on a new session.
"""
if 'session_initialized' not in st.session_state:
# New session detected
for key, default_val in DEFAULT_SESSION_STATE.items():
st.session_state[key] = default_val
# Clear questions_to_download on session start
st.session_state['questions_to_download'] = {}
# Mark session as initialized
st.session_state['session_initialized'] = True
"""
def initialise_session_state():
'Initializes the session state variables if not already set.'
for key, default_val in DEFAULT_SESSION_STATE.items():
if key not in st.session_state:
st.session_state[key] = default_val
"""
def reset_session_state_on_upload():
"""
Resets session state variables to their default values.
"""
for key, default_val in DEFAULT_SESSION_STATE.items():
if key != 'questions_to_download':
st.session_state[key] = default_val
def upload_pdf():
uploaded_file = st.file_uploader("Upload your PDF", type=["pdf"], label_visibility="collapsed")
if uploaded_file is not None:
prev_file = st.session_state.get('uploaded_pdf_name')
if uploaded_file.name != prev_file:
# New file detected
reset_session_state_on_upload()
st.session_state['pdf_changed'] = True
else:
st.session_state['pdf_changed'] = False
pdf_bytes = uploaded_file.read()
if pdf_bytes:
st.session_state['uploaded_pdf_bytes'] = pdf_bytes
st.session_state['uploaded_pdf_name'] = uploaded_file.name
st.success(f"File '{uploaded_file.name}' uploaded successfully!")
else:
st.error("Uploaded file is empty!")
elif uploaded_file is None and st.session_state.get('uploaded_pdf_bytes') is not None:
st.success("File uploaded successfully!")
else:
st.info("Please upload a PDF file to proceed.")
def show_pdf_preview():
if 'uploaded_pdf_bytes' in st.session_state:
pdf_bytes = st.session_state['uploaded_pdf_bytes']
doc = None
try:
doc = fitz.open(stream=pdf_bytes, filetype="pdf")
if doc.page_count < 1:
st.sidebar.error("PDF has no pages!")
return
page = doc.load_page(0)
pix = page.get_pixmap()
img = Image.open(io.BytesIO(pix.tobytes("png")))
st.sidebar.image(img, caption="First page preview", use_container_width=True)
except Exception as e:
st.sidebar.error(f"Failed to open PDF: {e}")
finally:
if doc is not None:
doc.close()
else:
st.sidebar.write("Upload a PDF to see a preview here.")