Spaces:
Paused
Paused
Update utils/database.py
Browse files- utils/database.py +27 -12
utils/database.py
CHANGED
|
@@ -20,6 +20,9 @@ import streamlit as st
|
|
| 20 |
import sqlite3
|
| 21 |
import traceback
|
| 22 |
import time
|
|
|
|
|
|
|
|
|
|
| 23 |
|
| 24 |
from sqlite3 import Error
|
| 25 |
|
|
@@ -142,6 +145,8 @@ def verify_vector_store(vector_store):
|
|
| 142 |
st.error(f"Vector store verification failed: {e}")
|
| 143 |
return False
|
| 144 |
|
|
|
|
|
|
|
| 145 |
def handle_document_upload(uploaded_files):
|
| 146 |
"""Handle document upload with progress tracking."""
|
| 147 |
try:
|
|
@@ -181,18 +186,28 @@ def handle_document_upload(uploaded_files):
|
|
| 181 |
status_container.info(f"๐ Processing document {idx + 1}/{len(uploaded_files)}: {file_name}")
|
| 182 |
details_container.text(f"๐ Current file: {file_name}")
|
| 183 |
|
| 184 |
-
#
|
| 185 |
-
|
| 186 |
-
|
| 187 |
-
|
| 188 |
-
|
| 189 |
-
|
| 190 |
-
|
| 191 |
-
|
| 192 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 193 |
|
| 194 |
-
|
| 195 |
-
|
| 196 |
|
| 197 |
# Update progress
|
| 198 |
current_progress += progress_per_file
|
|
@@ -257,7 +272,7 @@ def handle_document_upload(uploaded_files):
|
|
| 257 |
# Clean up progress display after 5 seconds if successful
|
| 258 |
if st.session_state.get('qa_system'):
|
| 259 |
time.sleep(5)
|
| 260 |
-
progress_container.empty()
|
| 261 |
|
| 262 |
def display_vector_store_info():
|
| 263 |
"""Display information about the current vector store state."""
|
|
|
|
| 20 |
import sqlite3
|
| 21 |
import traceback
|
| 22 |
import time
|
| 23 |
+
import io
|
| 24 |
+
import tempfile
|
| 25 |
+
from langchain_community.document_loaders import PyPDFLoader
|
| 26 |
|
| 27 |
from sqlite3 import Error
|
| 28 |
|
|
|
|
| 145 |
st.error(f"Vector store verification failed: {e}")
|
| 146 |
return False
|
| 147 |
|
| 148 |
+
|
| 149 |
+
|
| 150 |
def handle_document_upload(uploaded_files):
|
| 151 |
"""Handle document upload with progress tracking."""
|
| 152 |
try:
|
|
|
|
| 186 |
status_container.info(f"๐ Processing document {idx + 1}/{len(uploaded_files)}: {file_name}")
|
| 187 |
details_container.text(f"๐ Current file: {file_name}")
|
| 188 |
|
| 189 |
+
# Create a temporary file to save the PDF
|
| 190 |
+
with tempfile.NamedTemporaryFile(delete=False, suffix='.pdf') as tmp_file:
|
| 191 |
+
# Write the uploaded file content to the temporary file
|
| 192 |
+
tmp_file.write(uploaded_file.getvalue())
|
| 193 |
+
tmp_file.flush()
|
| 194 |
+
|
| 195 |
+
# Use PyPDFLoader to load the PDF
|
| 196 |
+
loader = PyPDFLoader(tmp_file.name)
|
| 197 |
+
pdf_documents = loader.load()
|
| 198 |
+
|
| 199 |
+
# Extract text content from the PDF
|
| 200 |
+
content = "\n".join(doc.page_content for doc in pdf_documents)
|
| 201 |
+
|
| 202 |
+
# Store in database
|
| 203 |
+
details_container.text(f"๐พ Storing {file_name} in database...")
|
| 204 |
+
doc_id = insert_document(st.session_state.db_conn, file_name, content)
|
| 205 |
+
if not doc_id:
|
| 206 |
+
status_container.error(f"โ Failed to store document: {file_name}")
|
| 207 |
+
continue
|
| 208 |
|
| 209 |
+
documents.append(content)
|
| 210 |
+
document_names.append(file_name)
|
| 211 |
|
| 212 |
# Update progress
|
| 213 |
current_progress += progress_per_file
|
|
|
|
| 272 |
# Clean up progress display after 5 seconds if successful
|
| 273 |
if st.session_state.get('qa_system'):
|
| 274 |
time.sleep(5)
|
| 275 |
+
progress_container.empty()
|
| 276 |
|
| 277 |
def display_vector_store_info():
|
| 278 |
"""Display information about the current vector store state."""
|