File size: 3,883 Bytes
1d8ed3b
 
 
 
770aec3
1d8ed3b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6eb8469
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1d8ed3b
1c7d8a0
1d8ed3b
 
 
6eb8469
1d8ed3b
 
 
 
 
 
 
 
 
9bf818a
 
1d8ed3b
25b9fab
1d8ed3b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
import streamlit as st
import fitz  # PyMuPDF
from PIL import Image
import io
import os 


DEFAULT_SESSION_STATE = {
    # PDF Upload
    'doc': None,
    'uploaded_pdf_name': None,
    'pdf_changed': False,
    'uploaded_pdf_bytes': None,
    'page_range_set' : False,
    'page_range_updated' : False,
    'full_text': None,
    'pages_data_infos': None,

    # TOC
    'page_choice': None,
    'toc_page_range': None,
    'toc': None,

    # Chapters
    'chapters_starting_page': None,
    'chapters_dict': None,
    'chapters_extracted': None,
    'chapters_chunked': None,
    'selected_chapter_idx': None,
    'selected_chapter_title': None,
    'num_questions': None,
    'chapter_selected_chunks': None,
    'chapter_prompt': None,

    # Topics
    'query': None,
    'questions_ready_topic': False,

    # Questions
    'questions_dict_chapter': None,
    'questions_dict_topic': None,
    'raw_output': None,  # remove this (only for debug)
    'questions_ready_chapter': False,
    'questions_to_download' : {}
}


def initialise_session_state():
    """
    Initializes session state variables on a new session.
    """
    if 'session_initialized' not in st.session_state:
        # New session detected
        for key, default_val in DEFAULT_SESSION_STATE.items():
            st.session_state[key] = default_val

        # Clear questions_to_download on session start
        st.session_state['questions_to_download'] = {}

        # Mark session as initialized
        st.session_state['session_initialized'] = True


"""
def initialise_session_state():
    'Initializes the session state variables if not already set.'
    for key, default_val in DEFAULT_SESSION_STATE.items():
        if key not in st.session_state:
            st.session_state[key] = default_val
"""


def reset_session_state_on_upload():
    """
    Resets session state variables to their default values.
    """
    for key, default_val in DEFAULT_SESSION_STATE.items():
        if key != 'questions_to_download':
            st.session_state[key] = default_val
    

def upload_pdf():
    uploaded_file = st.file_uploader("Upload your PDF", type=["pdf"], label_visibility="collapsed")
    
    if uploaded_file is not None:
        prev_file = st.session_state.get('uploaded_pdf_name')
        if uploaded_file.name != prev_file:
            # New file detected
            reset_session_state_on_upload()
            st.session_state['pdf_changed'] = True
        else:
            st.session_state['pdf_changed'] = False

        pdf_bytes = uploaded_file.read()

        if pdf_bytes:
            st.session_state['uploaded_pdf_bytes'] = pdf_bytes
            st.session_state['uploaded_pdf_name'] = uploaded_file.name
            st.success(f"File '{uploaded_file.name}' uploaded successfully!")
        else:
            st.error("Uploaded file is empty!")
                
    elif uploaded_file is None and st.session_state.get('uploaded_pdf_bytes') is not None:
        st.success("File uploaded successfully!")
    else:
        st.info("Please upload a PDF file to proceed.")


def show_pdf_preview():
    if 'uploaded_pdf_bytes' in st.session_state:
        pdf_bytes = st.session_state['uploaded_pdf_bytes']
        doc = None
        try:
            doc = fitz.open(stream=pdf_bytes, filetype="pdf")
            if doc.page_count < 1:
                st.sidebar.error("PDF has no pages!")
                return
            page = doc.load_page(0)
            pix = page.get_pixmap()
            img = Image.open(io.BytesIO(pix.tobytes("png")))
            st.sidebar.image(img, caption="First page preview", use_container_width=True)
        except Exception as e:
            st.sidebar.error(f"Failed to open PDF: {e}")
        finally:
            if doc is not None:
                doc.close()
    else:
        st.sidebar.write("Upload a PDF to see a preview here.")