File size: 6,656 Bytes
edac567
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
import streamlit as st
import os
import tempfile
import hashlib
from components.file_uploader import FileUploader
from components.pdf_viewer import PdfViewer
from components.chat_interface import ChatInterface
from services.pdf_processor import PDFProcessor
from services.embedding_service import EmbeddingService
from services.gemini_client import GeminiClient
from services.rag_service import RAGService

def initialize_session_state():
    defaults = {
        "chat_history": [],
        "pdf_processed": False,
        "pdf_id": None,
        "pdf_chunks": [],
        "uploaded_file_path": None,
        "current_file_name": None,
        "current_file_hash": None,
        "processing": False,
        "streaming": False,
        "chat_input": "",
        "clear_chat_input": False,   # <--- new flag
    }
    for k, v in defaults.items():
        if k not in st.session_state:
            st.session_state[k] = v

def reset_app_state(embedding_service: EmbeddingService) -> None:
    old_path = st.session_state.get("uploaded_file_path")
    if old_path and os.path.exists(old_path):
        try: os.unlink(old_path)
        except Exception: pass
    try:
        embedding_service.vector_store.clear_embeddings()
    except Exception:
        pass
    st.session_state.chat_history = []
    st.session_state.pdf_processed = False
    st.session_state.pdf_id = None
    st.session_state.pdf_chunks = []
    st.session_state.uploaded_file_path = None
    st.session_state.current_file_name = None
    st.session_state.current_file_hash = None
    st.session_state.processing = False
    st.session_state.streaming = False

def _file_hash(uploaded_file) -> str:
    return hashlib.md5(uploaded_file.getvalue()).hexdigest()

def auto_process_pdf(uploaded_file, tmp_file_path, embedding_service: EmbeddingService, force: bool = False):
    if st.session_state.processing:
        return
    if st.session_state.pdf_processed and not force:
        return
    st.session_state.processing = True
    status = st.empty()
    try:
        status.markdown("⏳ Processing PDF... 10%")
        pdf_processor = PDFProcessor()
        chunks = pdf_processor.process_pdf(tmp_file_path)
        if not chunks:
            status.error("Failed to extract text.")
            return
        pdf_id = uploaded_file.name.replace(".pdf","").replace(" ","_").replace(".","_")
        st.session_state.pdf_id = pdf_id
        st.session_state.pdf_chunks = chunks
        status.markdown("⏳ Processing PDF... 50%")
        embedding_service.store_pdf_embeddings(pdf_id, chunks)
        status.markdown("⏳ Processing PDF... 90%")
        st.session_state.pdf_processed = True
        status.success(f"βœ… Processing complete (100%). {len(chunks)} chunks ready.")
    except Exception as e:
        status.error(f"❌ Error: {e}")
    finally:
        st.session_state.processing = False

def main():
    st.set_page_config(page_title="PDF Chat Assistant", page_icon="πŸ“„", layout="wide")
    st.title("πŸ“„ PDF Chat Assistant")
    initialize_session_state()

    embedding_service = EmbeddingService()
    gemini_client = GeminiClient()
    rag_service = RAGService(embedding_service, gemini_client)

    col1, col2 = st.columns([1,1])

    with col1:
        st.header("πŸ“ Upload & Preview PDF")
        uploaded_file = FileUploader().upload_file()
        if uploaded_file:
            new_hash = _file_hash(uploaded_file)
            if st.session_state.current_file_hash and st.session_state.current_file_hash != new_hash:
                reset_app_state(embedding_service)
            if st.session_state.current_file_hash != new_hash:
                st.session_state.current_file_name = uploaded_file.name
                st.session_state.current_file_hash = new_hash
                st.session_state.pdf_processed = False
            with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp:
                tmp.write(uploaded_file.getvalue())
                tmp_path = tmp.name
            st.session_state.uploaded_file_path = tmp_path
            PdfViewer().display_pdf(tmp_path)
            auto_process_pdf(uploaded_file, tmp_path, embedding_service)
            if st.session_state.pdf_processed and not st.session_state.processing:
                if st.button("πŸ”„ Reprocess PDF"):
                    st.session_state.pdf_processed = False
                    auto_process_pdf(uploaded_file, tmp_path, embedding_service, force=True)
        else:
            st.info("Upload a PDF to begin.")

    with col2:
        st.header("πŸ’¬ Chat with your PDF")
        if st.session_state.processing:
            st.info("⏳ Processing... Please wait.")
            return

        chat_ui = ChatInterface()

        if st.session_state.pdf_processed and st.session_state.pdf_id:
            if st.session_state.clear_chat_input:
                st.session_state.chat_input = ""
                st.session_state.clear_chat_input = False

            chat_ui.render(st.session_state.chat_history)
            disabled = st.session_state.streaming
            user_input = st.text_input(
                "Ask a question:",
                key="chat_input",
                placeholder="Type your question...",
                disabled=disabled,
                label_visibility="collapsed"
            )
            send = st.button("Send", disabled=disabled or not user_input.strip(), use_container_width=True)

            if send and user_input.strip():
                query = user_input.strip()
                st.session_state.chat_history.append({"role": "user", "content": query})
                st.session_state.streaming = True

                st.session_state.clear_chat_input = True
                stream_iter = rag_service.stream_response(
                    query,
                    st.session_state.pdf_id,
                    st.session_state.chat_history
                )
                assistant_text = chat_ui.stream_assistant(st.session_state.chat_history, stream_iter)
                st.session_state.chat_history.append({"role": "assistant", "content": assistant_text})
                st.session_state.streaming = False
                st.rerun()

            col_a, col_b = st.columns([1,1])
            with col_a:
                if st.button("Clear Chat", disabled=st.session_state.streaming):
                    st.session_state.chat_history = []
                    st.session_state.clear_chat_input = True
                    st.rerun()
            with col_b:
                pass
        else:
            st.info("Upload and wait for processing to chat.")

if __name__ == "__main__":
    main()