File size: 10,372 Bytes
420a617
71ba388
e557000
 
 
71ba388
aa01ea5
30e7c2f
 
e557000
c350675
 
 
 
30e7c2f
 
c350675
aa01ea5
e557000
aa01ea5
25c34ad
c350675
 
26bd6f5
71ba388
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c350675
71ba388
 
26bd6f5
71ba388
e3bf196
420a617
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26bd6f5
 
 
 
 
 
 
 
 
 
 
5f090f7
acec4a7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a251126
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5f439e5
25c34ad
 
56b316d
cc2b215
 
 
 
 
 
 
 
 
 
 
 
 
420a617
c350675
26bd6f5
c350675
420a617
 
 
 
c350675
25c34ad
 
 
420a617
25c34ad
 
 
 
 
 
 
 
 
 
 
 
299df73
25c34ad
 
 
a251126
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
56b316d
25c34ad
 
 
 
 
 
 
 
 
 
 
 
 
 
cc2b215
25c34ad
 
 
 
 
 
 
 
 
 
 
 
acec4a7
 
 
 
56b316d
cc2b215
25c34ad
c3ceb0f
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
# app.py
import os
import sqlite3
import tempfile
import traceback
from pathlib import Path
from datetime import datetime
import streamlit as st
from langchain_community.document_loaders import PyPDFLoader  # Add this import
from components.chat import display_chat_interface, ensure_embeddings_initialized
from utils.database import (
    display_vector_store_info, 
    handle_document_upload,
    create_connection,
    create_tables,
    insert_document  # Make sure this is imported
)
from utils.persistence import PersistenceManager
from utils.document_chunker import DocumentChunker
from backend import get_embeddings_model, initialize_qa_system, initialize_faiss
import time

def initialize_database():
    """Initialize database connection and tables."""
    try:
        if 'db_conn' not in st.session_state:
            data_dir = "data"
            if not os.path.exists(data_dir):
                os.makedirs(data_dir)
            
            db_path = os.path.join(data_dir, 'rfp_analysis.db')
            
            try:
                with open(db_path, 'a') as f:
                    pass
            except IOError as e:
                return False
            
            conn = create_connection(db_path)
            
            if conn is not None:
                create_tables(conn)
                st.session_state.db_conn = conn
                return True
            else:
                return False
        else:
            return True
            
    except Exception:
        return False

def initialize_embeddings():
    """Initialize the embeddings model."""
    try:
        if 'embeddings' not in st.session_state:
            with st.spinner("Initializing embeddings model..."):
                embeddings = get_embeddings_model()
                if embeddings is not None:
                    st.session_state.embeddings = embeddings
                    return True
                else:
                    st.error("Failed to initialize embeddings model.")
                    return False
        return True
    except Exception as e:
        st.error(f"Error initializing embeddings: {str(e)}")
        return False

def display_header():
    """Display application header with logo."""
    header_col1, header_col2 = st.columns([1, 4])
    
    with header_col1:
        if os.path.exists("img/logo.png"):
            st.image("img/logo.png", width=100)
        else:
            st.error("Logo not found at img/logo.png")
    
    with header_col2:
        st.title("Synaptyx.AI - RFP Analysis Agent")
        
def display_example_questions():
    """Return a list of example questions for RFP analysis."""
    return [
        "πŸ“Š Summarize the main points of the document",
        "πŸ“ Draft a 'Why Us' section based on the document",
        "🎯 Extract key success metrics and outcomes",
        "πŸ’‘ What are the innovative solutions mentioned?",
        "🀝 Analyze the partnership benefits described",
        "πŸ“ˆ What are the key performance requirements?",
        "πŸ’° Extract budget and pricing information",
        "πŸ“… What are the important deadlines and milestones?",
        "⚑ Identify the technical requirements",
        "πŸ” What are the evaluation criteria?"
    ]
def handle_document_upload(uploaded_files, persistence):
    """Handle document upload and processing."""
    try:
        # Initialize progress indicators
        progress = st.progress(0)
        status = st.empty()
        
        # Initialize document chunker
        chunker = DocumentChunker(
            chunk_size=1000,
            chunk_overlap=200,
            max_tokens_per_chunk=2000
        )
        
        # Process each document
        progress_increment = 100 / len(uploaded_files)
        current_progress = 0
        
        document_pairs = []
        for idx, file in enumerate(uploaded_files):
            status.text(f"Processing document {idx + 1}/{len(uploaded_files)}: {file.name}")
            
            # Create temporary file
            with tempfile.NamedTemporaryFile(delete=False, suffix='.pdf') as tmp_file:
                tmp_file.write(file.getvalue())
                tmp_file.flush()
                
                # Load and process document
                loader = PyPDFLoader(tmp_file.name)
                pages = loader.load()
                content = "\n".join(page.page_content for page in pages)
                
                # Store in database
                doc_id = insert_document(st.session_state.db_conn, file.name, content)
                if not doc_id:
                    raise Exception(f"Failed to store document: {file.name}")
                
                document_pairs.append((content, file.name))
                
                # Clean up temp file
                os.unlink(tmp_file.name)
            
            current_progress += progress_increment
            progress.progress(int(current_progress))
        
        # Process documents with chunker
        status.text("Chunking documents...")
        chunks, chunk_metadatas = chunker.process_documents(document_pairs)
        
        # Generate session ID
        session_id = f"session_{datetime.now().strftime('%Y%m%d_%H%M%S')}"
        
        # Save chunks
        persistence.save_chunks(chunks, chunk_metadatas, session_id)
        
        # Initialize vector store
        status.text("Creating vector embeddings...")
        vector_store = initialize_faiss(st.session_state.embeddings, chunks, chunk_metadatas)
        if not vector_store:
            raise Exception("Failed to initialize vector store")
        
        # Save vector store
        persistence.save_vector_store(vector_store, session_id)
        
        # Initialize QA system
        status.text("Setting up QA system...")
        qa_system = initialize_qa_system(vector_store)
        if not qa_system:
            raise Exception("Failed to initialize QA system")
        
        # Update session state
        st.session_state.vector_store = vector_store
        st.session_state.qa_system = qa_system
        st.session_state.current_session_id = session_id
        
        progress.progress(100)
        status.empty()
        
        return True
        
    except Exception as e:
        st.error(f"Error processing documents: {str(e)}")
        return False
        
def main():
    # Set up the page configuration
    st.set_page_config(layout="wide", page_title="SYNAPTYX - RFP Analysis Agent")

    # Custom CSS for logo positioning
    st.markdown("""
        <style>
        [data-testid="stSidebarNav"] {
            background-image: url('img/logo.png');
            background-repeat: no-repeat;
            background-position: 20px 20px;
            background-size: 150px auto;
            padding-top: 120px;
        }
        </style>
        """, unsafe_allow_html=True)

    # Initialize database and embeddings
    if not initialize_database():
        st.error("Failed to initialize database. Please contact support.")
        return
    
    if not initialize_embeddings():
        st.error("Failed to initialize embeddings model. Please try refreshing the page.")
        return

    # Initialize session state for UI control
    if 'chat_ready' not in st.session_state:
        st.session_state.chat_ready = False

    # Sidebar for document management
    with st.sidebar:
        st.title("πŸ“š Document Manager")
        
        # Upload Section
        st.header("Upload Documents", anchor=False)
        uploaded_files = st.file_uploader(
            "Upload PDF documents",
            type=['pdf'],
            accept_multiple_files=True,
            help="Limit 200MB per file β€’ PDF"
        )

        # Process uploads
        if uploaded_files:
            if 'processed_files' not in st.session_state or uploaded_files != st.session_state.processed_files:
                try:
                    with st.spinner("Processing documents..."):
                        # Initialize components first
                        if 'persistence' not in st.session_state:
                            st.session_state.persistence = PersistenceManager()
                        
                        # Process documents
                        success = handle_document_upload(
                            uploaded_files=uploaded_files,
                            persistence=st.session_state.persistence  # Pass persistence manager as parameter
                        )
                        
                        if success:
                            st.session_state.processed_files = uploaded_files
                            st.session_state.chat_ready = True
                            st.success("Documents processed successfully!")
                            time.sleep(1)
                            st.rerun()
                        else:
                            st.error("Failed to process documents. Please try again.")
                except Exception as e:
                    st.error(f"Error during document processing: {str(e)}")
                    st.error(traceback.format_exc())

        # Knowledge Base Status
        if st.session_state.get('vector_store'):
            st.success("βœ… Documents ready for analysis")
            display_vector_store_info()
        
        # Document List
        if uploaded_files:
            st.subheader("πŸ“‘ Uploaded Documents")
            for doc in uploaded_files:
                st.write(f"β€’ {doc.name}")

    # Main chat area
    if not st.session_state.chat_ready:
        # Welcome screen
        st.title("πŸ€– SYNAPTYX - RFP Analysis Agent")
        st.markdown("### Welcome to your AI-powered RFP analysis assistant!")
        
        col1, col2 = st.columns(2)
        with col1:
            st.markdown("""
            #### Getting Started:
            1. Upload your RFP documents using the sidebar
            2. Wait for the processing to complete
            3. Start chatting with your documents!
            """)
        
        with col2:
            st.markdown("#### Example Questions You Can Ask:")
            examples = display_example_questions()
            for example in examples:
                st.markdown(f"{example}")
    else:
        # Clean chat interface
        display_chat_interface()

if __name__ == "__main__":
    main()