Copy-AI

Build error

App Files Files Community

Wajahat698 commited on Nov 22, 2024

Commit

7b950b0

verified ·

1 Parent(s): b01817c

Update app.py

Browse files

Files changed (1) hide show

app.py +269 -114

app.py CHANGED Viewed

@@ -13,6 +13,8 @@ from langchain.agents.format_scratchpad.openai_tools import format_to_openai_too
 from langchain_core.messages import AIMessage, HumanMessage
 from langchain_community.document_loaders import TextLoader
 from langchain_text_splitters import CharacterTextSplitter
 import serpapi
 import requests
 import streamlit.components.v1 as components
@@ -106,15 +108,18 @@ def convert_pdf_to_md(file):
 def convert_docx_to_md(file):
     """
-    Convert a Word (DOCX) file to Markdown.
     """
     try:
-        doc = Document(file)
-        full_text = '\n'.join([para.text for para in doc.paragraphs])
-        return f"# Word Document\n\n{full_text}"
     except Exception as e:
-        logger.error(f"Error converting DOCX to MD: {e}")
-        return ""
 def convert_txt_to_md(file):
     """
@@ -154,35 +159,47 @@ def merge_markdown_contents(contents):
     return merged_content
 def upload_to_firebase(user_id, file):
     """
-    Uploads the content of a file to Firebase Storage.
     """
-    try:
-        # Detect file type and convert to Markdown
-        content = convert_file_to_md(file)
-        if not content:
-            st.error("File conversion failed. Unsupported or empty file.")
-            return None
-        # Create a unique filename
-        filename = f"knowledge_bases/{user_id}/knowledge_base_{uuid.uuid4()}.md"
-        # Upload content to Firebase Storage
-        storage.child(filename).put(content.encode('utf-8'))
-        # Get the public URL
-        public_url = storage.child(filename).get_url(None)
-        doc_id = str(uuid.uuid4())
-        db.child("users").child(user_id).child("documents").child(doc_id).set({
-            "name": file.name,
-            "content_url": public_url  # Link to the file in Firebase Storage
-        })
-        st.success(f"File '{file.name}' uploaded successfully!")
-        return content,public_url
-    except Exception as e:
-        st.error(f"Error uploading to Firebase: {e}")
-        return None
 def fetch_trustbuilders(user_id):
@@ -407,46 +424,39 @@ def load_main_data_source():
         st.error(f"Error loading main data source: {e}")
         return []
-def combine_data_sources():
-    main_data_source = load_main_data_source()
-    user_data_source = load_user_data_source(st.session_state["wix_user_id"])
-    trustbuilders = [Document(page_content=tb["message"]) for tb in st.session_state.get("trustbuilders", {}).values()]
-    brand_tonalities = [Document(page_content=bt["message"]) for bt in st.session_state.get("brand_tonality", {}).values()]
-    return main_data_source + user_data_source + trustbuilders + brand_tonalities
-def refresh_faiss_index():
-    combined_sources = combine_data_sources()
-    if combined_sources:
-        embeddings = OpenAIEmbeddings()
-        db_faiss = FAISS.from_documents(combined_sources, embeddings)
-        st.session_state["faiss_db"] = db_faiss
-def load_user_data_source(user_id):
     """
-    Fetches user-uploaded data sources from Firebase.
     """
-    docs = fetch_documents(user_id)
-    user_documents = []
-    if not docs:
-        return user_documents
-    for doc_id, doc in docs.items():
-        try:
-            # Fetch content from Firebase Storage via the URL
-            response = requests.get(doc["content_url"])
-            if response.status_code == 200:
-                user_documents.append(Document(page_content=response.text))
-            else:
-                st.error(f"Failed to fetch document content for '{doc['name']}'")
-        except Exception as e:
-            st.error(f"Error fetching document {doc_id}: {e}")
-    return user_documents
 def update_message_counter():
@@ -545,13 +555,14 @@ def download_link(content, filename):
-def fetch_documents(user_id):
     try:
-        docs = db.child("users").child(user_id).child("documents").get().val()
-        return docs if docs else {}
     except Exception as e:
-        st.error(f"Error fetching documents: {e}")
-        return {}
@@ -757,17 +768,18 @@ def side():
                 st.sidebar.error(f"Error fetching documents: {e}")
                 st.session_state["documents"] = {}
-        # Display saved documents
-        saved_docs = "\n".join(
-            [
-                f"{doc_data.get('content', f'Document {doc_id[:8]}')}"
-                for doc_id, doc_data in st.session_state["documents"].items()
-            ]
-        ) if st.session_state["documents"] else "Save documents like your brand tonality, key phrases, or segments here and they will show here."
         st.text_area(
             label="",
-            value=saved_docs,
             height=150,
             key="saved_documents_text_area",
             disabled=True
@@ -785,7 +797,6 @@ def side():
                             {"content": content, "name": uploaded_file.name}
                         )
                         st.session_state["documents"][doc_id] = {"content": content, "name": uploaded_file.name}
-                        refresh_faiss_index()
                         st.sidebar.success(f"Document '{uploaded_file.name}' uploaded successfully!")
                         st.rerun()
                     else:
@@ -794,25 +805,30 @@ def side():
                     st.sidebar.error(f"Error uploading document: {e}")
             else:
                 st.sidebar.warning("Please select a file to upload.")
-        # Delete Button
         if st.session_state["documents"]:
-            selected_doc_to_delete = st.selectbox(
-                "Select document to delete",
                 options=list(st.session_state["documents"].keys()),
                 format_func=lambda x: st.session_state["documents"][x].get("name", f"Document {x}"),
-                key="delete_doc_selector"
             )
-            if st.sidebar.button("Delete", key="delete_button"):
                 try:
-                    db.child("users").child(st.session_state["wix_user_id"]).child("KnowledgeBase").child(selected_doc_to_delete).remove()
-                    st.session_state["documents"].pop(selected_doc_to_delete, None)
                     st.sidebar.success("Document deleted successfully!")
-                    st.rerun()
                 except Exception as e:
                     st.sidebar.error(f"Error deleting document: {e}")
         st.sidebar.markdown("</div>", unsafe_allow_html=True)
         trust_buckets = ["Any","Stability", "Development", "Relationship", "Benefit", "Vision", "Competence"]
@@ -1035,26 +1051,33 @@ def google_search(query):
-# RAG response function
 def rag_response(query):
     try:
-        # Directly search for the exact term in the knowledge base
-        retrieved_docs = search_knowledge_base(query)
-        context = "\n".join(doc.page_content for doc in retrieved_docs)
-        # Prepare the prompt with the retrieved context
-        prompt = f"Context:\n{context}\n\nQuestion: {query}\nAnswer:"
-        llm = ChatOpenAI(model="gpt-4o", temperature=0.5, api_key=openai_api_key)
-        response = llm.invoke(prompt)
-        # Replace terms in the final output as per your restrictions
-        response_content = response.content
-        return response_content
     except Exception as e:
         logger.error(f"Error generating RAG response: {e}")
         return "Error occurred during RAG response generation"
 # Define tools
 @tool
 def knowledge_base_tool(query: str):
@@ -1308,7 +1331,15 @@ prompt_template = ChatPromptTemplate.from_messages([
 ])
 # Create Langchain Agent
-llm = ChatOpenAI(model="gpt-4o", temperature=0.5)
 llm_with_tools = llm.bind_tools(tools)
 # Define the agent pipeline
@@ -1626,13 +1657,21 @@ def handle_memory_queries(prompt):
     """
     prompt = prompt.lower().strip()
-    # Save and allocate TrustBuilder
-    if "save and allocate" in prompt:
-        content_to_save = prompt.replace("save and allocate:", "").strip()
-        if content_to_save:
-            assistant_response = handle_save_trustbuilder(content_to_save)
         else:
-            assistant_response = "Please provide content to save and allocate."
         # Save response to chat history and display it
         st.session_state.chat_history.append({"role": "assistant", "content": assistant_response})
@@ -1640,6 +1679,41 @@ def handle_memory_queries(prompt):
             st.markdown(assistant_response)
         return None
     # Show saved TrustBuilders
     elif "find my saved trustbuilders" in prompt or "show my saved trustbuilders" in prompt:
         trustbuilders = fetch_trustbuilders(st.session_state.get("wix_user_id", "default_user"))
@@ -1752,9 +1826,90 @@ def handle_save_trustbuilder(content, specified_bucket=None):
-    # Generate response
-# Function to update the message counter in a static location
 if "email" not in st.session_state:
     st.session_state["email"] = f"demo_user_{st.session_state['wix_user_id']}@example.com"

 from langchain_core.messages import AIMessage, HumanMessage
 from langchain_community.document_loaders import TextLoader
 from langchain_text_splitters import CharacterTextSplitter
+from langchain.text_splitter import RecursiveCharacterTextSplitter
 import serpapi
 import requests
 import streamlit.components.v1 as components
 def convert_docx_to_md(file):
     """
+    Extract text from a .docx file and return as a single string.
     """
     try:
+        # Read the file
+        doc = DocxDocument(uploaded_file)
+        # Extract all text
+        text = "\n".join([paragraph.text for paragraph in doc.paragraphs])
+        if not text.strip():  # Handle empty content
+            raise ValueError("The document has no content.")
+        return text
     except Exception as e:
+        raise ValueError(f"Error reading .docx file: {e}")
 def convert_txt_to_md(file):
     """
     return merged_content
 def upload_to_firebase(user_id, file):
+    content = convert_file_to_md(file)
+    if not content:
+        return None, "Failed to convert file to content."
+    doc_id = str(uuid.uuid4())
+    document_data = {"content": content, "name": file.name}
+    # Save to Firebase
+    db.child("users").child(user_id).child("KnowledgeBase").child(doc_id).set(document_data)
+    # Update session state
+    if "documents" not in st.session_state:
+        st.session_state["documents"] = {}
+    st.session_state["documents"][doc_id] = document_data
+    # Index the document content
+    index_document_content(content, doc_id)
+    st.sidebar.success(f"Document '{file.name}' uploaded  successfully!")
+    return content,None
+def index_document_content(doc_content, doc_id):
     """
+    Indexes the document content by splitting it into chunks and creating embeddings.
     """
+    # Split the document into chunks
+    text_splitter = RecursiveCharacterTextSplitter(
+        chunk_size=500,
+        chunk_overlap=50,
+    )
+    texts = text_splitter.split_text(doc_content)
+    # Create embeddings for each chunk
+    embeddings = OpenAIEmbeddings(openai_api_key=openai_api_key)
+    doc_metadata = [{"doc_id": doc_id, "chunk_id": i} for i in range(len(texts))]
+    vector_store = FAISS.from_texts(texts, embeddings, metadatas=doc_metadata)
+    # Save the vector store in session state
+    if "vector_store" not in st.session_state:
+        st.session_state["vector_store"] = {}
+    st.session_state["vector_store"][doc_id] = vector_store
 def fetch_trustbuilders(user_id):
         st.error(f"Error loading main data source: {e}")
         return []
+def combine_data_sources(include_main=True):
+    main_data_source = load_main_data_source() if include_main else []
+    user_documents = load_user_data_source(st.session_state["wix_user_id"])
+    return main_data_source + user_documents
+def refresh_faiss_index(documents=None):
     """
+    Refresh the FAISS index with updated documents.
+    If documents are provided, only update those; otherwise, rebuild the index.
     """
+    if documents:
+        # Add/Update specific documents in the index
+        embeddings = OpenAIEmbeddings()
+        st.session_state["faiss_db"].add_documents(documents, embeddings)
+    else:
+        # Rebuild the entire index
+        combined_sources = combine_data_sources()
+        if combined_sources:
+            embeddings = OpenAIEmbeddings()
+            st.session_state["faiss_db"] = FAISS.from_documents(combined_sources, embeddings)
+def load_user_data_source(user_id):
+    try:
+        docs = db.child("users").child(user_id).child("KnowledgeBase").get().val()
+        if not docs:
+            return []
+        user_documents = [Document(page_content=doc["content"]) for doc in docs.values()]
+        return user_documents
+    except Exception as e:
+        st.error(f"Error loading user data source: {e}")
+        return []
 def update_message_counter():
+def fetch_documents():
     try:
+        docs = db.child("users").child(st.session_state["wix_user_id"]).child("KnowledgeBase").get().val()
+        st.session_state["documents"] = docs if docs else {}
     except Exception as e:
+        st.sidebar.error(f"Error fetching documents: {e}")
+        st.session_state["documents"] = {}
                 st.sidebar.error(f"Error fetching documents: {e}")
                 st.session_state["documents"] = {}
+        def update_saved_docs_content():
+            return "\n\n---\n\n".join(
+                [
+                    f"**{doc_data.get('name', f'Document {doc_id[:8]}')}**\n{doc_data.get('content', 'No content available')}"
+                    for doc_id, doc_data in st.session_state["documents"].items()
+                ]
+            ) if st.session_state["documents"] else "Save documents like your brand tonality, key phrases, or segments here and they will show here."
+        saved_docs_content = update_saved_docs_content()
         st.text_area(
             label="",
+            value=saved_docs_content,
             height=150,
             key="saved_documents_text_area",
             disabled=True
                             {"content": content, "name": uploaded_file.name}
                         )
                         st.session_state["documents"][doc_id] = {"content": content, "name": uploaded_file.name}
                         st.sidebar.success(f"Document '{uploaded_file.name}' uploaded successfully!")
                         st.rerun()
                     else:
                     st.sidebar.error(f"Error uploading document: {e}")
             else:
                 st.sidebar.warning("Please select a file to upload.")
+        # Display and delete functionality for documents
         if st.session_state["documents"]:
+            # Select a document to view or delete
+            selected_doc_id = st.selectbox(
+                "Select document to view or delete",
                 options=list(st.session_state["documents"].keys()),
                 format_func=lambda x: st.session_state["documents"][x].get("name", f"Document {x}"),
+                key="select_doc"
             )
+            # Button to delete the selected document
+            if st.sidebar.button("Delete ", key="delete_button"):
                 try:
+                    # Remove the document from Firebase
+                    db.child("users").child(st.session_state["wix_user_id"]).child("KnowledgeBase").child(selected_doc_id).remove()
+                    # Remove the document from session state
+                    fetch_documents()
                     st.sidebar.success("Document deleted successfully!")
                 except Exception as e:
                     st.sidebar.error(f"Error deleting document: {e}")
         st.sidebar.markdown("</div>", unsafe_allow_html=True)
         trust_buckets = ["Any","Stability", "Development", "Relationship", "Benefit", "Vision", "Competence"]
 def rag_response(query):
     try:
+        # Check if the query references uploaded documents
+        if "using uploaded document" in query.lower():
+            document_response = handle_document_query(query)  # Use your existing `handle_document_query` function
+            if document_response:
+                return document_response
+        else:
+            # Proceed with the existing knowledge base logic if no uploaded document context is specified
+            retrieved_docs = search_knowledge_base(query)
+            context = "\n".join(doc.page_content for doc in retrieved_docs)
+            # Prepare the prompt with the retrieved context
+            prompt = f"Context:\n{context}\n\nQuestion: {query}\nAnswer:"
+            llm = ChatOpenAI(model="gpt-4o", temperature=0.5, api_key=openai_api_key)
+            response = llm.invoke(prompt)
+            # Replace terms in the final output as per your restrictions
+            response_content = response.content
+            return response_content
     except Exception as e:
         logger.error(f"Error generating RAG response: {e}")
         return "Error occurred during RAG response generation"
 # Define tools
 @tool
 def knowledge_base_tool(query: str):
 ])
 # Create Langchain Agent
+llm = ChatOpenAI(
+    model="gpt-4o",
+    temperature=0.7,  # Balanced creativity and adherence
+    max_tokens=2000,  # Ensure sufficient output length
+    top_p=0.85,  # Focused outputs
+    frequency_penalty=0.1,  # Minimize repetition
+    presence_penalty=0.7  # Moderate novelty to maintain adherence
+)
 llm_with_tools = llm.bind_tools(tools)
 # Define the agent pipeline
     """
     prompt = prompt.lower().strip()
+    valid_buckets = ["Stability", "Development", "Relationship", "Benefit", "Vision", "Competence"]
+    # Case 1: Save this as [bucket] trust builder: [content]
+    match_save_this_specific = re.search(r"\bsave\s+(this\s+)?as\s+(\w+)\s+trust\s+builders?\s*:\s*(.+)", prompt, re.IGNORECASE)
+    if match_save_this_specific:
+        specified_bucket = match_save_this_specific.group(2).capitalize()
+        content_to_save = match_save_this_specific.group(3).strip()
+        if specified_bucket in valid_buckets:
+            if content_to_save:
+                assistant_response = handle_save_trustbuilder(content_to_save, specified_bucket)
+            else:
+                assistant_response = "No content provided. Please include content after 'save this as [bucket] trust builder:'."
         else:
+            assistant_response = f"Invalid Trust Bucket '{specified_bucket}'. Valid buckets are: {', '.join(valid_buckets)}."
         # Save response to chat history and display it
         st.session_state.chat_history.append({"role": "assistant", "content": assistant_response})
             st.markdown(assistant_response)
         return None
+    # Case 2: Save this under [bucket]: [content]
+    match_save_under_specific = re.search(r"\bsave\s+(this\s+)?under\s+(\w+)\s*:\s*(.+)", prompt, re.IGNORECASE)
+    if match_save_under_specific:
+        specified_bucket = match_save_under_specific.group(2).capitalize()
+        content_to_save = match_save_under_specific.group(3).strip()
+        if specified_bucket in valid_buckets:
+            if content_to_save:
+                assistant_response = handle_save_trustbuilder(content_to_save, specified_bucket)
+            else:
+                assistant_response = "No content provided. Please include content after 'save this under [bucket]:'."
+        else:
+            assistant_response = f"Invalid Trust Bucket '{specified_bucket}'. Valid buckets are: {', '.join(valid_buckets)}."
+        # Save response to chat history and display it
+        st.session_state.chat_history.append({"role": "assistant", "content": assistant_response})
+        with st.chat_message("assistant"):
+            st.markdown(assistant_response)
+        return None
+    # Case 3: Save and allocate: [content] (automatic allocation)
+    match_save_allocate_auto = re.search(r"\bsave\s+(this\s+)?and\s+allocate\s*:\s*(.+)", prompt, re.IGNORECASE)
+    if match_save_allocate_auto:
+        content_to_save = match_save_allocate_auto.group(2).strip()
+        if content_to_save:
+            assistant_response = handle_save_trustbuilder(content_to_save)  # Automatically allocate bucket
+        else:
+            assistant_response = "No content provided. Please include content after 'save and allocate:'."
+        # Save response to chat history and display it
+        st.session_state.chat_history.append({"role": "assistant", "content": assistant_response})
+        with st.chat_message("assistant"):
+            st.markdown(assistant_response)
+        return
     # Show saved TrustBuilders
     elif "find my saved trustbuilders" in prompt or "show my saved trustbuilders" in prompt:
         trustbuilders = fetch_trustbuilders(st.session_state.get("wix_user_id", "default_user"))
+def load_user_memory(user_id):
+    """
+    Load saved TrustBuilders and uploaded documents from Firebase into session state.
+    """
+    try:
+        # Load TrustBuilders
+        trustbuilders = db.child("users").child(user_id).child("TrustBuilders").get().val()
+        st.session_state["trustbuilders"] = trustbuilders if trustbuilders else []
+        # Load Uploaded Documents from 'KnowledgeBase'
+        documents = db.child("users").child(user_id).child("KnowledgeBase").get().val()
+        st.session_state["documents"] = documents if documents else {}
+        # Reconstruct vector stores for each document
+        st.session_state["vector_store"] = {}
+        for doc_id, doc_data in st.session_state["documents"].items():
+            content = doc_data.get("content", "")
+            if content:
+                index_document_content(content, doc_id)
+    except Exception as e:
+        st.error(f"Error loading user memory: {e}")
+        st.session_state["trustbuilders"] = []
+        st.session_state["documents"] = {}
+        st.session_state["vector_store"] = {}
+def get_document_content(doc_name=None):
+    documents = st.session_state.get("documents", {})
+    if not documents:
+        return None, "No documents have been uploaded."
+    # If a specific document name is provided
+    if doc_name:
+        for doc_id, doc_data in documents.items():
+            if doc_data.get("name", "").lower() == doc_name.lower():
+                content = doc_data.get("content")
+                if content:
+                    return content, None
+                else:
+                    return None, f"Document '{doc_name}' does not contain any content."
+        return None, f"Document '{doc_name}' not found."
+    # Default to the most recent document
+    last_doc = list(documents.values())[-1]
+    content = last_doc.get("content")
+    if content:
+        return content, None
+    else:
+        return None, "The most recently uploaded document does not contain any content."
+def handle_document_query(query):
+    """
+    Handle queries related to uploaded documents for response generation.
+    """
+    # Extract specific document name if mentioned
+    doc_name_match = re.search(r"document\s+'([^']+)'", query, re.IGNORECASE)
+    doc_name = doc_name_match.group(1) if doc_name_match else None
+    # Fetch document content
+    doc_content, error = get_document_content(doc_name)
+    if error:
+        return error
+    # Generate AI response with document context
+    full_prompt = f"Document Content:\n{doc_content}\n\nUser Query: {query}\n\nResponse:"
+    try:
+        llm = ChatOpenAI(model="gpt-4", temperature=0.5, api_key=openai_api_key)
+        response = llm.invoke(full_prompt)
+        return response.content
+    except Exception as e:
+        return f"Error generating response using the document: {e}"
+if "missing_trustbucket_content" not in st.session_state:
+    st.session_state["missing_trustbucket_content"] = None
+if "handled" not in st.session_state:
+    st.session_state["handled"] = False
 if "email" not in st.session_state:
     st.session_state["email"] = f"demo_user_{st.session_state['wix_user_id']}@example.com"