RFP_Analyzer_Agent_backup

Sleeping

App Files Files Community

cryogenic22 commited on Dec 2, 2024

Commit

b5ab699

verified ·

1 Parent(s): f4cf8fa

Update utils/database.py

Browse files

Files changed (1) hide show

utils/database.py +82 -65

utils/database.py CHANGED Viewed

@@ -97,36 +97,6 @@ def create_tables(conn):
         st.error(f"Error: {e}")
-def process_document(file_path):
-    """
-    Process a PDF document with proper chunking.
-    Args:
-        file_path (str): Path to the PDF file.
-    Returns:
-        tuple: (list of document chunks, full content of the document).
-    """
-    # Load PDF
-    loader = PyPDFLoader(file_path)
-    documents = loader.load()
-    # Create text splitter
-    text_splitter = RecursiveCharacterTextSplitter(
-        chunk_size=1000,
-        chunk_overlap=200,
-        length_function=len,
-        separators=["\n\n", "\n", " ", ""]
-    )
-    # Split documents into chunks
-    chunks = text_splitter.split_documents(documents)
-    # Extract text content for database storage
-    full_content = "\n".join(doc.page_content for doc in documents)
-    return chunks, full_content
 def get_documents(conn):
     """
     Retrieve all documents from the database.
@@ -199,12 +169,16 @@ def verify_vector_store(vector_store):
         return False
-def handle_document_upload(uploaded_files):
-    """
-    Handle document upload with progress tracking.
     Args:
-        uploaded_files (list): List of uploaded files.
     """
     try:
         # Initialize session state variables if they don't exist
@@ -213,7 +187,7 @@ def handle_document_upload(uploaded_files):
         if 'vector_store' not in st.session_state:
             st.session_state.vector_store = None
-        # Create a progress container
         progress_container = st.empty()
         status_container = st.empty()
         details_container = st.empty()
@@ -223,17 +197,15 @@ def handle_document_upload(uploaded_files):
         status_container.info("🔄 Initializing document processing...")
         # Reset existing states
-        if st.session_state.vector_store is not None:
-            st.session_state.vector_store = None
-        if st.session_state.qa_system is not None:
-            st.session_state.qa_system = None
         # Initialize embeddings (10% progress)
         status_container.info("🔄 Initializing embeddings model...")
         embeddings = get_embeddings_model()
         if not embeddings:
             status_container.error("❌ Failed to initialize embeddings model")
-            return
         progress_bar.progress(10)
         # Process documents
@@ -244,6 +216,8 @@ def handle_document_upload(uploaded_files):
         progress_per_file = 70 / len(uploaded_files)
         current_progress = 10
         for idx, uploaded_file in enumerate(uploaded_files):
             file_name = uploaded_file.name
             status_container.info(f"🔄 Processing document {idx + 1}/{len(uploaded_files)}: {file_name}")
@@ -262,9 +236,18 @@ def handle_document_upload(uploaded_files):
                     status_container.error(f"❌ Failed to store document: {file_name}")
                     continue
                 # Add chunks with metadata
                 for chunk in chunks:
-                    chunk.metadata["source"] = file_name
                 all_chunks.extend(chunks)
                 documents.append(content)
@@ -273,7 +256,7 @@ def handle_document_upload(uploaded_files):
             current_progress += progress_per_file
             progress_bar.progress(int(current_progress))
-        # Initialize vector store with chunks instead of full documents
         status_container.info("🔄 Initializing vector store...")
         vector_store = FAISS.from_documents(
             all_chunks,
@@ -285,55 +268,89 @@ def handle_document_upload(uploaded_files):
         details_container.text("✨ Performing final checks...")
         if not verify_vector_store(vector_store):
             status_container.error("❌ Vector store verification failed")
-            return
         # Initialize QA system (90-100% progress)
         status_container.info("🔄 Setting up QA system...")
         qa_system = initialize_qa_system(vector_store)
         if not qa_system:
             status_container.error("❌ Failed to initialize QA system")
-            return
-        # Store QA system in session state
         st.session_state.qa_system = qa_system
         # Complete!
         progress_bar.progress(100)
         status_container.success("✅ Documents processed successfully!")
         details_container.markdown(
-            """
             🎉 **Ready to chat!**
-            - Documents loaded: {}
-            - Total content size: {:.2f} KB
-            - Vector store initialized
-            - QA system ready
             You can now start asking questions about your documents!
-            """.format(
-                len(documents),
-                sum(len(doc) for doc in documents) / 1024
-            )
         )
         # Add notification
         st.balloons()
-        # Set chat ready flag
-        st.session_state.chat_ready = True
     except Exception as e:
-        status_container.error(f"❌ Error processing documents: {e}")
-        details_container.error(traceback.format_exc())
         # Reset states on error
         st.session_state.vector_store = None
         st.session_state.qa_system = None
         st.session_state.chat_ready = False
-    finally:
-        # Clean up progress display after 5 seconds if successful
-        if st.session_state.get('qa_system') is not None:
-            time.sleep(5)
-            progress_container.empty()
 def display_vector_store_info():

         st.error(f"Error: {e}")
 def get_documents(conn):
     """
     Retrieve all documents from the database.
         return False
+# utils/database.py
+def handle_document_upload(uploaded_files, **kwargs):
+    """
+    Handle document upload with progress tracking and collection support.
     Args:
+        uploaded_files (list): List of uploaded files
+        **kwargs: Additional arguments including:
+            - collection_id (int, optional): ID of the collection to add documents to
     """
     try:
         # Initialize session state variables if they don't exist
         if 'vector_store' not in st.session_state:
             st.session_state.vector_store = None
+        # Create progress containers
         progress_container = st.empty()
         status_container = st.empty()
         details_container = st.empty()
         status_container.info("🔄 Initializing document processing...")
         # Reset existing states
+        st.session_state.vector_store = None
+        st.session_state.qa_system = None
         # Initialize embeddings (10% progress)
         status_container.info("🔄 Initializing embeddings model...")
         embeddings = get_embeddings_model()
         if not embeddings:
             status_container.error("❌ Failed to initialize embeddings model")
+            return False
         progress_bar.progress(10)
         # Process documents
         progress_per_file = 70 / len(uploaded_files)
         current_progress = 10
+        collection_id = kwargs.get('collection_id')
         for idx, uploaded_file in enumerate(uploaded_files):
             file_name = uploaded_file.name
             status_container.info(f"🔄 Processing document {idx + 1}/{len(uploaded_files)}: {file_name}")
                     status_container.error(f"❌ Failed to store document: {file_name}")
                     continue
+                # Add to collection if specified
+                if collection_id:
+                    if not add_document_to_collection(st.session_state.db_conn, doc_id, collection_id):
+                        status_container.warning(f"⚠️ Failed to add document to collection: {file_name}")
                 # Add chunks with metadata
                 for chunk in chunks:
+                    chunk.metadata.update({
+                        "source": file_name,
+                        "document_id": doc_id,
+                        "collection_id": collection_id if collection_id else None
+                    })
                 all_chunks.extend(chunks)
                 documents.append(content)
             current_progress += progress_per_file
             progress_bar.progress(int(current_progress))
+        # Initialize vector store with chunks
         status_container.info("🔄 Initializing vector store...")
         vector_store = FAISS.from_documents(
             all_chunks,
         details_container.text("✨ Performing final checks...")
         if not verify_vector_store(vector_store):
             status_container.error("❌ Vector store verification failed")
+            return False
         # Initialize QA system (90-100% progress)
         status_container.info("🔄 Setting up QA system...")
         qa_system = initialize_qa_system(vector_store)
         if not qa_system:
             status_container.error("❌ Failed to initialize QA system")
+            return False
+        # Store in session state
+        if collection_id:
+            if 'vector_stores' not in st.session_state:
+                st.session_state.vector_stores = {}
+            st.session_state.vector_stores[collection_id] = vector_store
+        else:
+            st.session_state.vector_store = vector_store
         st.session_state.qa_system = qa_system
         # Complete!
         progress_bar.progress(100)
         status_container.success("✅ Documents processed successfully!")
         details_container.markdown(
+            f"""
             🎉 **Ready to chat!**
+            - Documents processed: {len(documents)}
+            - Total content size: {sum(len(doc) for doc in documents) / 1024:.2f} KB
+            - {"Added to collection" if collection_id else "Processed as standalone documents"}
             You can now start asking questions about your documents!
+            """
         )
         # Add notification
         st.balloons()
+        # Clean up progress display after 3 seconds
+        time.sleep(3)
+        progress_container.empty()
+        status_container.empty()
+        details_container.empty()
+        return True
     except Exception as e:
+        st.error(f"❌ Error processing documents: {str(e)}")
+        if status_container:
+            status_container.error(traceback.format_exc())
         # Reset states on error
         st.session_state.vector_store = None
         st.session_state.qa_system = None
         st.session_state.chat_ready = False
+        return False
+def process_document(file_path):
+    """
+    Process a PDF document with proper chunking.
+    Args:
+        file_path (str): Path to the PDF file
+    Returns:
+        tuple: (list of document chunks, full content of the document)
+    """
+    # Load PDF
+    loader = PyPDFLoader(file_path)
+    documents = loader.load()
+    # Create text splitter
+    text_splitter = RecursiveCharacterTextSplitter(
+        chunk_size=1000,
+        chunk_overlap=200,
+        length_function=len,
+        separators=["\n\n", "\n", " ", ""]
+    )
+    # Split documents into chunks
+    chunks = text_splitter.split_documents(documents)
+    # Extract full content for database storage
+    full_content = "\n".join(doc.page_content for doc in documents)
+    return chunks, full_content
 def display_vector_store_info():