RFP_Analyzer_Agent_backup

Build error

App Files Files Community

cryogenic22 commited on Dec 2, 2024

Commit

61e7e62

verified ·

1 Parent(s): c9ae383

Update utils/database.py

Browse files

Files changed (1) hide show

utils/database.py +114 -0

utils/database.py CHANGED Viewed

@@ -1108,6 +1108,30 @@ def process_document(file_path):
     return chunks, full_content
 def display_vector_store_info():
     """
     Display information about the current vector store state.
@@ -1154,6 +1178,96 @@ def display_vector_store_info():
         st.error(traceback.format_exc())
 def initialize_qa_system(vector_store):
     """
     Initialize QA system with optimized retrieval.

     return chunks, full_content
+def delete_collection(conn: sqlite3.Connection, collection_id: int) -> bool:
+    """Delete a collection and its associations."""
+    try:
+        with conn_lock:
+            cursor = conn.cursor()
+            # Delete the collection's document associations first
+            cursor.execute('''
+                DELETE FROM document_collections
+                WHERE collection_id = ?
+            ''', (collection_id,))
+            # Then delete the collection itself
+            cursor.execute('''
+                DELETE FROM collections
+                WHERE id = ?
+            ''', (collection_id,))
+            conn.commit()
+            return True
+    except sqlite3.Error as e:
+        st.error(f"Error deleting collection: {e}")
+        return False
 def display_vector_store_info():
     """
     Display information about the current vector store state.
         st.error(traceback.format_exc())
+def process_and_store_document(uploaded_file) -> Optional[int]:
+    """
+    Process an uploaded document and store it in the database.
+    Args:
+        uploaded_file: Streamlit's UploadedFile object
+    Returns:
+        Optional[int]: The ID of the stored document if successful, None otherwise
+    """
+    try:
+        # Create a temporary file to store the uploaded content
+        with tempfile.NamedTemporaryFile(delete=False, suffix='.pdf') as tmp_file:
+            tmp_file.write(uploaded_file.getvalue())
+            tmp_file.flush()
+            # Load and process the PDF
+            loader = PyPDFLoader(tmp_file.name)
+            documents = loader.load()
+            # Create text splitter for processing
+            text_splitter = RecursiveCharacterTextSplitter(
+                chunk_size=1000,
+                chunk_overlap=200,
+                length_function=len,
+                separators=["\n\n", "\n", " ", ""]
+            )
+            # Split documents into chunks
+            chunks = text_splitter.split_documents(documents)
+            # Extract full content for database storage
+            full_content = "\n".join(doc.page_content for doc in documents)
+            # Store in database
+            with st.session_state.db_conn as conn:
+                cursor = conn.cursor()
+                # Insert document
+                cursor.execute('''
+                    INSERT INTO documents (name, content, upload_date)
+                    VALUES (?, ?, ?)
+                ''', (uploaded_file.name, full_content, datetime.now()))
+                # Get the document ID
+                document_id = cursor.lastrowid
+                conn.commit()
+                return document_id
+    except Exception as e:
+        st.error(f"Error processing document {uploaded_file.name}: {str(e)}")
+        import traceback
+        st.error(traceback.format_exc())
+        return None
+    finally:
+        # Clean up temporary file
+        import os
+        try:
+            os.unlink(tmp_file.name)
+        except:
+            pass
+def get_document_content(conn: sqlite3.Connection, document_id: int) -> Optional[str]:
+    """
+    Retrieve the content of a specific document.
+    Args:
+        conn: Database connection
+        document_id: ID of the document to retrieve
+    Returns:
+        Optional[str]: The document content if found, None otherwise
+    """
+    try:
+        cursor = conn.cursor()
+        cursor.execute('''
+            SELECT content
+            FROM documents
+            WHERE id = ?
+        ''', (document_id,))
+        result = cursor.fetchone()
+        return result[0] if result else None
+    except sqlite3.Error as e:
+        st.error(f"Error retrieving document content: {e}")
+        return None
 def initialize_qa_system(vector_store):
     """
     Initialize QA system with optimized retrieval.