cryogenic22 commited on
Commit
4e1e6ae
ยท
verified ยท
1 Parent(s): e3bf196

Update utils/database.py

Browse files
Files changed (1) hide show
  1. utils/database.py +27 -12
utils/database.py CHANGED
@@ -20,6 +20,9 @@ import streamlit as st
20
  import sqlite3
21
  import traceback
22
  import time
 
 
 
23
 
24
  from sqlite3 import Error
25
 
@@ -142,6 +145,8 @@ def verify_vector_store(vector_store):
142
  st.error(f"Vector store verification failed: {e}")
143
  return False
144
 
 
 
145
  def handle_document_upload(uploaded_files):
146
  """Handle document upload with progress tracking."""
147
  try:
@@ -181,18 +186,28 @@ def handle_document_upload(uploaded_files):
181
  status_container.info(f"๐Ÿ”„ Processing document {idx + 1}/{len(uploaded_files)}: {file_name}")
182
  details_container.text(f"๐Ÿ“„ Current file: {file_name}")
183
 
184
- # Read file content
185
- content = uploaded_file.read()
186
-
187
- # Store in database
188
- details_container.text(f"๐Ÿ’พ Storing {file_name} in database...")
189
- doc_id = insert_document(st.session_state.db_conn, file_name, content.decode('utf-8'))
190
- if not doc_id:
191
- status_container.error(f"โŒ Failed to store document: {file_name}")
192
- continue
 
 
 
 
 
 
 
 
 
 
193
 
194
- documents.append(content.decode('utf-8'))
195
- document_names.append(file_name)
196
 
197
  # Update progress
198
  current_progress += progress_per_file
@@ -257,7 +272,7 @@ def handle_document_upload(uploaded_files):
257
  # Clean up progress display after 5 seconds if successful
258
  if st.session_state.get('qa_system'):
259
  time.sleep(5)
260
- progress_container.empty()
261
 
262
  def display_vector_store_info():
263
  """Display information about the current vector store state."""
 
20
  import sqlite3
21
  import traceback
22
  import time
23
+ import io
24
+ import tempfile
25
+ from langchain_community.document_loaders import PyPDFLoader
26
 
27
  from sqlite3 import Error
28
 
 
145
  st.error(f"Vector store verification failed: {e}")
146
  return False
147
 
148
+
149
+
150
  def handle_document_upload(uploaded_files):
151
  """Handle document upload with progress tracking."""
152
  try:
 
186
  status_container.info(f"๐Ÿ”„ Processing document {idx + 1}/{len(uploaded_files)}: {file_name}")
187
  details_container.text(f"๐Ÿ“„ Current file: {file_name}")
188
 
189
+ # Create a temporary file to save the PDF
190
+ with tempfile.NamedTemporaryFile(delete=False, suffix='.pdf') as tmp_file:
191
+ # Write the uploaded file content to the temporary file
192
+ tmp_file.write(uploaded_file.getvalue())
193
+ tmp_file.flush()
194
+
195
+ # Use PyPDFLoader to load the PDF
196
+ loader = PyPDFLoader(tmp_file.name)
197
+ pdf_documents = loader.load()
198
+
199
+ # Extract text content from the PDF
200
+ content = "\n".join(doc.page_content for doc in pdf_documents)
201
+
202
+ # Store in database
203
+ details_container.text(f"๐Ÿ’พ Storing {file_name} in database...")
204
+ doc_id = insert_document(st.session_state.db_conn, file_name, content)
205
+ if not doc_id:
206
+ status_container.error(f"โŒ Failed to store document: {file_name}")
207
+ continue
208
 
209
+ documents.append(content)
210
+ document_names.append(file_name)
211
 
212
  # Update progress
213
  current_progress += progress_per_file
 
272
  # Clean up progress display after 5 seconds if successful
273
  if st.session_state.get('qa_system'):
274
  time.sleep(5)
275
+ progress_container.empty()
276
 
277
  def display_vector_store_info():
278
  """Display information about the current vector store state."""