Spaces:

trustlogic
/

Copy-AI

Sleeping

App Files Files Community

Wajahat698 commited on Dec 4, 2024

Commit

5660872

verified ·

1 Parent(s): 570d233

Update app.py

Browse files

Files changed (1) hide show

app.py +137 -77

app.py CHANGED Viewed

@@ -102,61 +102,92 @@ storage = firebase.storage()
 backend_url = "https://backend-web-05122eab4e09.herokuapp.com"
-def convert_pdf_to_md(file):
     """
-    Convert a PDF file to Markdown.
     """
     try:
-        text = extract_text(file)
-        return f"# PDF Document\n\n{text}"
     except Exception as e:
-        logger.error(f"Error converting PDF to MD: {e}")
-        return ""
-def convert_docx_to_md(file):
     """
-    Extract text from a .docx file and return as a single string.
     """
     try:
-        # Read the file
-        doc =  docx.Document(file)
-        # Extract all text
         text = "\n".join([paragraph.text for paragraph in doc.paragraphs])
-        if not text.strip():  # Handle empty content
-            raise ValueError("The document has no content.")
-        return text
     except Exception as e:
-        raise ValueError(f"Error reading .docx file: {e}")
-def convert_txt_to_md(file):
     """
-    Convert a TXT file to Markdown.
     """
     try:
         text = file.read().decode("utf-8")
-        return f"# Text Document\n\n{text}"
     except Exception as e:
-        logger.error(f"Error converting TXT to MD: {e}")
-        return ""
-def display_save_confirmation(type_saved):
     """
-    Display a confirmation message when content is saved.
     """
-    st.info(f"Content successfully saved as **{type_saved}**!")
-def convert_file_to_md(file):
     """
-    Detect file type and convert to Markdown accordingly.
     """
-    if file.type == "application/pdf":
-        return convert_pdf_to_md(file)
-    elif file.type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document":
-        return convert_docx_to_md(file)
-    elif file.type == "text/plain":
-        return convert_txt_to_md(file)
-    else:
-        st.sidebar.warning(f"Unsupported file type: {file.type}")
-        return ""
 def merge_markdown_contents(contents):
     """
@@ -170,7 +201,7 @@ def upload_to_firebase(user_id, file):
     """
     Upload document to Firebase, extract content, and add it to the knowledge base.
     """
-    content = convert_file_to_md(file)  # Ensure this function extracts content correctly
     if not content:
         return None, "Failed to extract content from the file."
@@ -200,7 +231,7 @@ def index_document_content(doc_content, doc_id):
     """
     Indexes the document content by splitting it into chunks and creating embeddings.
     """
-    text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100)
     texts = text_splitter.split_text(doc_content)
     # Create embeddings for each chunk
@@ -758,7 +789,6 @@ def side():
         # Fetch documents from Firebase
         if "documents" not in st.session_state:
             try:
                 docs = db.child("users").child(st.session_state["wix_user_id"]).child("KnowledgeBase").get().val()
@@ -785,41 +815,64 @@ def side():
         )
         # File uploader
-        uploaded_file = st.file_uploader("", type=["pdf", "docx", "txt"], key="file_uploader", label_visibility="collapsed")
-        if st.sidebar.button("Upload", key="upload_button"):
-            if uploaded_file:
                 try:
-                    content, _= upload_to_firebase(st.session_state["wix_user_id"], uploaded_file)
-                    st.rerun()
                 except Exception as e:
-                    st.sidebar.error(f"Error uploading document: {e}")
-            else:
-                st.sidebar.warning("Please select a file to upload.")
         # Display and delete functionality for documents
-        if st.session_state["documents"]:
-            # Select a document to view or delete
-            selected_doc_id = st.selectbox(
-                "Select document to view or delete",
-                options=list(st.session_state["documents"].keys()),
-                format_func=lambda x: st.session_state["documents"][x].get("name", f"Document {x}"),
-                key="select_doc"
             )
-            # Button to delete the selected document
-            if st.sidebar.button("Delete ", key="delete_button"):
-                try:
-                    # Remove the document from Firebase
-                    db.child("users").child(st.session_state["wix_user_id"]).child("KnowledgeBase").child(selected_doc_id).remove()
-                    # Remove the document from session state
-                    fetch_documents()
-                    st.sidebar.success("Document deleted successfully!")
                 except Exception as e:
-                    st.sidebar.error(f"Error deleting document: {e}")
         st.sidebar.markdown("</div>", unsafe_allow_html=True)
         trust_buckets = ["Any","Stability", "Development", "Relationship", "Benefit", "Vision", "Competence"]
@@ -1074,31 +1127,37 @@ def google_search(query):
         return ["Error occurred during Google search"]
-def rag_response(query):
     """
-    Handle queries by searching both static and dynamically uploaded knowledge bases.
     """
     try:
-        # Initialize results list
         results = []
-        # Search FAISS database (static knowledge base)
         if "faiss_db" in st.session_state:
             retrieved_docs = search_knowledge_base(query)
             results.extend(retrieved_docs)
-        # Search vector stores (dynamic knowledge base)
-        if "vector_store" in st.session_state:
-            for vector_store in st.session_state["vector_store"].values():
-                vector_store_results = vector_store.similarity_search(query, k=3)  # Adjust `k` as needed
-                results.extend(vector_store_results)
         # Combine results into a single context
         context = "\n".join([doc.page_content for doc in results])
-        if not context.strip():
-            return "No relevant information found in the knowledge base."
         # Generate AI response with the retrieved context
         prompt = f"""
         Context:
@@ -1115,6 +1174,7 @@ def rag_response(query):
         llm = ChatOpenAI(model="gpt-4", temperature=0.2, api_key=openai_api_key)
         response = llm.invoke(prompt)
         return response.content.strip()
     except Exception as e:
         logger.error(f"Error generating RAG response: {e}")

 backend_url = "https://backend-web-05122eab4e09.herokuapp.com"
+def convert_file_to_txt(file):
+    """
+    Convert different file types to plain text.
+    """
+    if file.type == "application/pdf":
+        return convert_pdf_to_txt(file)
+    elif file.type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document":
+        return convert_docx_to_txt(file)
+    elif file.type == "text/plain":
+        return convert_txt_to_txt(file)
+    elif file.type == "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet":
+        return convert_excel_to_txt(file)
+    elif file.type == "text/csv":
+        return convert_csv_to_txt(file)
+    else:
+        st.sidebar.warning(f"Unsupported file type: {file.type}")
+        return None
+def convert_pdf_to_txt(file):
     """
+    Convert a PDF file to plain text.
     """
     try:
+        text = extract_text(file)  # Use PyPDF2 or pdfplumber for better accuracy if needed
+        return text.strip()
     except Exception as e:
+        st.sidebar.error(f"Error converting PDF to TXT: {e}")
+        return None
+def convert_docx_to_txt(file):
     """
+    Extract text from a .docx file.
     """
     try:
+        doc = docx.Document(file)
         text = "\n".join([paragraph.text for paragraph in doc.paragraphs])
+        return text.strip()
     except Exception as e:
+        st.sidebar.error(f"Error converting DOCX to TXT: {e}")
+        return None
+def convert_txt_to_txt(file):
     """
+    Handle plain text file as is.
     """
     try:
         text = file.read().decode("utf-8")
+        return text.strip()
     except Exception as e:
+        st.sidebar.error(f"Error reading TXT file: {e}")
+        return None
+def convert_excel_to_txt(file):
     """
+    Convert an Excel file to plain text.
     """
+    try:
+        df = pd.read_excel(file)
+        text = df.to_string(index=False)
+        return text.strip()
+    except Exception as e:
+        st.sidebar.error(f"Error converting Excel to TXT: {e}")
+        return None
+def convert_csv_to_txt(file):
     """
+    Convert a CSV file to plain text.
     """
+    try:
+        df = pd.read_csv(file)
+        text = df.to_string(index=False)
+        return text.strip()
+    except Exception as e:
+        st.sidebar.error(f"Error converting CSV to TXT: {e}")
+        return None
 def merge_markdown_contents(contents):
     """
     """
     Upload document to Firebase, extract content, and add it to the knowledge base.
     """
+    content = convert_file_to_txt(file)  # Ensure this function extracts content correctly
     if not content:
         return None, "Failed to extract content from the file."
     """
     Indexes the document content by splitting it into chunks and creating embeddings.
     """
+    text_splitter = RecursiveCharacterTextSplitter(chunk_size=2000, chunk_overlap=500)
     texts = text_splitter.split_text(doc_content)
     # Create embeddings for each chunk
         # Fetch documents from Firebase
         if "documents" not in st.session_state:
             try:
                 docs = db.child("users").child(st.session_state["wix_user_id"]).child("KnowledgeBase").get().val()
         )
         # File uploader
+        uploaded_files = st.file_uploader(
+            "",
+            type=["pdf", "docx", "txt"],
+            accept_multiple_files=True,
+            key="file_uploader"
+        )
+        if uploaded_files:
+            for uploaded_file in uploaded_files:
                 try:
+                    upload_to_firebase(st.session_state["wix_user_id"], uploaded_file)
+                    st.sidebar.success(f"File '{uploaded_file.name}' uploaded and converted to TXT!")
                 except Exception as e:
+                    st.sidebar.error(f"Error processing file '{uploaded_file.name}': {e}")
         # Display and delete functionality for documents
+        if st.session_state.get("documents"):
+            doc_ids = list(st.session_state["documents"].keys())
+            doc_options = ["None (use only main knowledge base)"] + doc_ids
+            selected_options = st.multiselect(
+                "Select documents to include in your query:",
+                options=doc_options,
+                default="None (use only main knowledge base)",
+                format_func=lambda x: st.session_state["documents"][x].get("name", f"Document {x}") if x != "None (use only main knowledge base)" else x,
+                key="select_docs"
             )
+            selected_doc_ids = [doc_id for doc_id in selected_options if doc_id != "None (use only main knowledge base)"]
+            st.session_state['selected_doc_ids'] = selected_doc_ids
+            if selected_doc_ids:
+                selected_doc_names = [st.session_state['documents'][doc_id]['name'] for doc_id in selected_doc_ids]
+                st.info(f"Selected Documents: {', '.join(selected_doc_names)}")
+            else:
+                st.sidebar.info("Using only the main knowledge base.")
+        else:
+            selected_doc_ids = []
+        # Button to delete the selected documents
+        if selected_doc_ids:
+            if st.button("Delete Selected Documents", key="delete_button"):
+                try:
+                    for doc_id in selected_doc_ids:
+                        # Remove the document from Firebase
+                        db.child("users").child(st.session_state["wix_user_id"]).child("KnowledgeBase").child(doc_id).remove()
+                        # Remove from session state
+                        st.session_state["vector_store"].pop(doc_id, None)
+                        st.session_state["documents"].pop(doc_id, None)
+                    st.success("Selected documents deleted successfully!")
+                    st.rerun()
                 except Exception as e:
+                    st.error(f"Error deleting documents: {e}")
         st.sidebar.markdown("</div>", unsafe_allow_html=True)
         trust_buckets = ["Any","Stability", "Development", "Relationship", "Benefit", "Vision", "Competence"]
         return ["Error occurred during Google search"]
+def rag_response(query, selected_doc_ids=None):
     """
+    Handle queries by searching both the main knowledge base and the selected documents.
     """
     try:
         results = []
+        # Search FAISS database (main knowledge base)
         if "faiss_db" in st.session_state:
             retrieved_docs = search_knowledge_base(query)
             results.extend(retrieved_docs)
+        # If selected_doc_ids is None, try to get it from session state
+        if selected_doc_ids is None:
+            selected_doc_ids = st.session_state.get('selected_doc_ids', [])
+        # Search vector stores of the selected documents
+        if selected_doc_ids:
+            for doc_id in selected_doc_ids:
+                vector_store = st.session_state.get("vector_store", {}).get(doc_id)
+                if vector_store:
+                    vector_store_results = vector_store.similarity_search(query, k=5)
+                    results.extend(vector_store_results)
+                else:
+                    st.warning(f"Vector store for document '{st.session_state['documents'][doc_id]['name']}' not found.")
         # Combine results into a single context
         context = "\n".join([doc.page_content for doc in results])
+        if not context.strip():
+            return "No relevant information found in the knowledge bases."
         # Generate AI response with the retrieved context
         prompt = f"""
         Context:
         llm = ChatOpenAI(model="gpt-4", temperature=0.2, api_key=openai_api_key)
         response = llm.invoke(prompt)
         return response.content.strip()
     except Exception as e:
         logger.error(f"Error generating RAG response: {e}")