cryogenic22 commited on
Commit
a251126
·
verified ·
1 Parent(s): e557000

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +107 -7
app.py CHANGED
@@ -92,7 +92,90 @@ def display_example_questions():
92
  "⚡ Identify the technical requirements",
93
  "🔍 What are the evaluation criteria?"
94
  ]
95
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
96
  def main():
97
  # Set up the page configuration
98
  st.set_page_config(layout="wide", page_title="SYNAPTYX - RFP Analysis Agent")
@@ -139,12 +222,29 @@ def main():
139
  # Process uploads
140
  if uploaded_files:
141
  if 'processed_files' not in st.session_state or uploaded_files != st.session_state.processed_files:
142
- with st.spinner("Processing documents..."):
143
- handle_document_upload(uploaded_files)
144
- st.session_state.processed_files = uploaded_files
145
- st.session_state.chat_ready = True
146
- time.sleep(1)
147
- st.rerun()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
148
 
149
  # Knowledge Base Status
150
  if st.session_state.get('vector_store'):
 
92
  "⚡ Identify the technical requirements",
93
  "🔍 What are the evaluation criteria?"
94
  ]
95
+ def handle_document_upload(uploaded_files, persistence):
96
+ """Handle document upload and processing."""
97
+ try:
98
+ # Initialize progress indicators
99
+ progress = st.progress(0)
100
+ status = st.empty()
101
+
102
+ # Initialize document chunker
103
+ chunker = DocumentChunker(
104
+ chunk_size=1000,
105
+ chunk_overlap=200,
106
+ max_tokens_per_chunk=2000
107
+ )
108
+
109
+ # Process each document
110
+ progress_increment = 100 / len(uploaded_files)
111
+ current_progress = 0
112
+
113
+ document_pairs = []
114
+ for idx, file in enumerate(uploaded_files):
115
+ status.text(f"Processing document {idx + 1}/{len(uploaded_files)}: {file.name}")
116
+
117
+ # Create temporary file
118
+ with tempfile.NamedTemporaryFile(delete=False, suffix='.pdf') as tmp_file:
119
+ tmp_file.write(file.getvalue())
120
+ tmp_file.flush()
121
+
122
+ # Load and process document
123
+ loader = PyPDFLoader(tmp_file.name)
124
+ pages = loader.load()
125
+ content = "\n".join(page.page_content for page in pages)
126
+
127
+ # Store in database
128
+ doc_id = insert_document(st.session_state.db_conn, file.name, content)
129
+ if not doc_id:
130
+ raise Exception(f"Failed to store document: {file.name}")
131
+
132
+ document_pairs.append((content, file.name))
133
+
134
+ # Clean up temp file
135
+ os.unlink(tmp_file.name)
136
+
137
+ current_progress += progress_increment
138
+ progress.progress(int(current_progress))
139
+
140
+ # Process documents with chunker
141
+ status.text("Chunking documents...")
142
+ chunks, chunk_metadatas = chunker.process_documents(document_pairs)
143
+
144
+ # Generate session ID
145
+ session_id = f"session_{datetime.now().strftime('%Y%m%d_%H%M%S')}"
146
+
147
+ # Save chunks
148
+ persistence.save_chunks(chunks, chunk_metadatas, session_id)
149
+
150
+ # Initialize vector store
151
+ status.text("Creating vector embeddings...")
152
+ vector_store = initialize_faiss(st.session_state.embeddings, chunks, chunk_metadatas)
153
+ if not vector_store:
154
+ raise Exception("Failed to initialize vector store")
155
+
156
+ # Save vector store
157
+ persistence.save_vector_store(vector_store, session_id)
158
+
159
+ # Initialize QA system
160
+ status.text("Setting up QA system...")
161
+ qa_system = initialize_qa_system(vector_store)
162
+ if not qa_system:
163
+ raise Exception("Failed to initialize QA system")
164
+
165
+ # Update session state
166
+ st.session_state.vector_store = vector_store
167
+ st.session_state.qa_system = qa_system
168
+ st.session_state.current_session_id = session_id
169
+
170
+ progress.progress(100)
171
+ status.empty()
172
+
173
+ return True
174
+
175
+ except Exception as e:
176
+ st.error(f"Error processing documents: {str(e)}")
177
+ return False
178
+
179
  def main():
180
  # Set up the page configuration
181
  st.set_page_config(layout="wide", page_title="SYNAPTYX - RFP Analysis Agent")
 
222
  # Process uploads
223
  if uploaded_files:
224
  if 'processed_files' not in st.session_state or uploaded_files != st.session_state.processed_files:
225
+ try:
226
+ with st.spinner("Processing documents..."):
227
+ # Initialize components first
228
+ if 'persistence' not in st.session_state:
229
+ st.session_state.persistence = PersistenceManager()
230
+
231
+ # Process documents
232
+ success = handle_document_upload(
233
+ uploaded_files=uploaded_files,
234
+ persistence=st.session_state.persistence # Pass persistence manager as parameter
235
+ )
236
+
237
+ if success:
238
+ st.session_state.processed_files = uploaded_files
239
+ st.session_state.chat_ready = True
240
+ st.success("Documents processed successfully!")
241
+ time.sleep(1)
242
+ st.rerun()
243
+ else:
244
+ st.error("Failed to process documents. Please try again.")
245
+ except Exception as e:
246
+ st.error(f"Error during document processing: {str(e)}")
247
+ st.error(traceback.format_exc())
248
 
249
  # Knowledge Base Status
250
  if st.session_state.get('vector_store'):