Spaces:
Paused
Paused
Update utils/database.py
Browse files- utils/database.py +118 -1
utils/database.py
CHANGED
|
@@ -138,7 +138,124 @@ def verify_vector_store(vector_store):
|
|
| 138 |
except Exception as e:
|
| 139 |
st.error(f"Vector store verification failed: {e}")
|
| 140 |
return False
|
| 141 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 142 |
def initialize_qa_system(vector_store):
|
| 143 |
"""Initialize QA system with proper chat handling."""
|
| 144 |
try:
|
|
|
|
| 138 |
except Exception as e:
|
| 139 |
st.error(f"Vector store verification failed: {e}")
|
| 140 |
return False
|
| 141 |
+
|
| 142 |
+
def handle_document_upload(uploaded_files):
|
| 143 |
+
"""Handle document upload with progress tracking."""
|
| 144 |
+
try:
|
| 145 |
+
# Create a progress container
|
| 146 |
+
progress_container = st.empty()
|
| 147 |
+
status_container = st.empty()
|
| 148 |
+
details_container = st.empty()
|
| 149 |
+
|
| 150 |
+
# Initialize progress bar
|
| 151 |
+
progress_bar = progress_container.progress(0)
|
| 152 |
+
status_container.info("π Initializing document processing...")
|
| 153 |
+
|
| 154 |
+
# Reset existing states
|
| 155 |
+
if 'vector_store' in st.session_state:
|
| 156 |
+
del st.session_state.vector_store
|
| 157 |
+
if 'qa_system' in st.session_state:
|
| 158 |
+
del st.session_state.qa_system
|
| 159 |
+
|
| 160 |
+
# Initialize embeddings (10% progress)
|
| 161 |
+
status_container.info("π Initializing embeddings model...")
|
| 162 |
+
embeddings = get_embeddings_model()
|
| 163 |
+
if not embeddings:
|
| 164 |
+
status_container.error("β Failed to initialize embeddings model")
|
| 165 |
+
return
|
| 166 |
+
progress_bar.progress(10)
|
| 167 |
+
|
| 168 |
+
# Process documents
|
| 169 |
+
documents = []
|
| 170 |
+
document_names = []
|
| 171 |
+
|
| 172 |
+
# Calculate progress steps per file
|
| 173 |
+
progress_per_file = 70 / len(uploaded_files) # 70% of progress for file processing
|
| 174 |
+
current_progress = 10
|
| 175 |
+
|
| 176 |
+
for idx, uploaded_file in enumerate(uploaded_files):
|
| 177 |
+
file_name = uploaded_file.name
|
| 178 |
+
status_container.info(f"π Processing document {idx + 1}/{len(uploaded_files)}: {file_name}")
|
| 179 |
+
details_container.text(f"π Current file: {file_name}")
|
| 180 |
+
|
| 181 |
+
# Read file content
|
| 182 |
+
content = uploaded_file.read()
|
| 183 |
+
|
| 184 |
+
# Store in database
|
| 185 |
+
details_container.text(f"πΎ Storing {file_name} in database...")
|
| 186 |
+
doc_id = insert_document(st.session_state.db_conn, file_name, content.decode('utf-8'))
|
| 187 |
+
if not doc_id:
|
| 188 |
+
status_container.error(f"β Failed to store document: {file_name}")
|
| 189 |
+
continue
|
| 190 |
+
|
| 191 |
+
documents.append(content.decode('utf-8'))
|
| 192 |
+
document_names.append(file_name)
|
| 193 |
+
|
| 194 |
+
# Update progress
|
| 195 |
+
current_progress += progress_per_file
|
| 196 |
+
progress_bar.progress(int(current_progress))
|
| 197 |
+
|
| 198 |
+
if not documents:
|
| 199 |
+
status_container.error("β No documents were successfully processed")
|
| 200 |
+
return
|
| 201 |
+
|
| 202 |
+
# Initialize vector store (80-90% progress)
|
| 203 |
+
status_container.info("π Initializing vector store...")
|
| 204 |
+
details_container.text("π Creating vector embeddings...")
|
| 205 |
+
vector_store = initialize_faiss(embeddings, documents, document_names)
|
| 206 |
+
if not vector_store:
|
| 207 |
+
status_container.error("β Failed to initialize vector store")
|
| 208 |
+
return
|
| 209 |
+
progress_bar.progress(90)
|
| 210 |
+
|
| 211 |
+
# Verify vector store
|
| 212 |
+
status_container.info("π Verifying document indexing...")
|
| 213 |
+
details_container.text("β¨ Performing final checks...")
|
| 214 |
+
if not verify_vector_store(vector_store):
|
| 215 |
+
status_container.error("β Vector store verification failed")
|
| 216 |
+
return
|
| 217 |
+
|
| 218 |
+
# Store in session state
|
| 219 |
+
st.session_state.vector_store = vector_store
|
| 220 |
+
|
| 221 |
+
# Initialize QA system (90-100% progress)
|
| 222 |
+
status_container.info("π Setting up QA system...")
|
| 223 |
+
qa_system = initialize_qa_system(vector_store)
|
| 224 |
+
if not qa_system:
|
| 225 |
+
status_container.error("β Failed to initialize QA system")
|
| 226 |
+
return
|
| 227 |
+
|
| 228 |
+
st.session_state.qa_system = qa_system
|
| 229 |
+
|
| 230 |
+
# Complete!
|
| 231 |
+
progress_bar.progress(100)
|
| 232 |
+
status_container.success("β
Documents processed successfully!")
|
| 233 |
+
details_container.markdown("""
|
| 234 |
+
π **Ready to chat!**
|
| 235 |
+
- Documents loaded: {}
|
| 236 |
+
- Total content size: {:.2f} KB
|
| 237 |
+
- Vector store initialized
|
| 238 |
+
- QA system ready
|
| 239 |
+
|
| 240 |
+
You can now start asking questions about your documents!
|
| 241 |
+
""".format(
|
| 242 |
+
len(documents),
|
| 243 |
+
sum(len(doc) for doc in documents) / 1024
|
| 244 |
+
))
|
| 245 |
+
|
| 246 |
+
# Add notification
|
| 247 |
+
st.balloons()
|
| 248 |
+
|
| 249 |
+
except Exception as e:
|
| 250 |
+
status_container.error(f"β Error processing documents: {e}")
|
| 251 |
+
details_container.error(traceback.format_exc())
|
| 252 |
+
|
| 253 |
+
finally:
|
| 254 |
+
# Clean up progress display after 5 seconds if successful
|
| 255 |
+
if st.session_state.get('qa_system'):
|
| 256 |
+
time.sleep(5)
|
| 257 |
+
progress_container.empty()
|
| 258 |
+
|
| 259 |
def initialize_qa_system(vector_store):
|
| 260 |
"""Initialize QA system with proper chat handling."""
|
| 261 |
try:
|