Spaces:

AjiNiktech
/

Document_search

Sleeping

App Files Files Community

AjiNiktech commited on Jul 4, 2024

Commit

025429e

verified ·

1 Parent(s): ad0163a

Update app.py

Browse files

Files changed (1) hide show

app.py +63 -62

app.py CHANGED Viewed

@@ -16,7 +16,7 @@ from langchain.document_loaders import PyPDFLoader
 st.set_page_config(page_title="Enterprise document search + chat", layout="wide")
 # Streamlit app header
-st.title("Enterprise document Helpdesk")
 # Sidebar for API Key input
 with st.sidebar:
@@ -33,49 +33,48 @@ if "OPENAI_API_KEY" in os.environ:
         dotenv.load_dotenv()
         chat = ChatOpenAI(model="gpt-3.5-turbo-1106", temperature=0.2)
         # loader1 = PyPDFLoader("Tbank resources.pdf")
         # loader2 = PyPDFLoader("International Banking Services.pdf")
         # data1 = loader1.load()
         # data2 = loader2.load()
         # data = data1 + data2
         st.header('Multiple File Upload')
-        uploaded_files = st.file_uploader('Upload your files',accept_multiple_files=True, type=['txt', 'pdf','csv','ppt','doc','xls','pptx','xlsx'])
         if uploaded_files:
             all_documents = []
             for file in uploaded_files:
                 documents = load_document(file)
                 all_documents.extend(documents)
             text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
-            all_splits = text_splitter.split_documents(all_documents)
             embeddings = OpenAIEmbeddings(model="text-embedding-ada-002")
             vectorstore = Chroma.from_documents(documents=all_splits, embedding=embeddings)
             retriever = vectorstore.as_retriever(k=4)
             SYSTEM_TEMPLATE = """
-            You are Tbank's AI assistant, a chatbot whose knowledge comes exclusively from Tbank's website content and provided PDF documents. Follow these guidelines:
-            1. Greet users warmly, e.g., "Hello! Welcome to Tbank. How can I assist you today?"
-            2. If asked about your identity, state you're Tbank's AI assistant and ask how you can help.
-            3. Use only information from the website content and provided PDFs. Do not infer or make up information.
-            4. Provide clear, concise responses using only the given information. Keep answers brief and relevant.
-            5. For questions outside your knowledge base, respond:
-              "I apologize, but I don't have information about that. My knowledge is limited to Tbank's products/services and our website/document content. Is there anything specific about Tbank I can help with?"
-            6. Maintain a friendly, professional tone.
-            7. If unsure, say:
-              "I'm not certain about that. For accurate information, please check our website or contact our customer support team."
-            8. For requests for opinions or subjective information, remind users you're an AI that provides only factual information from Tbank sources.
-            9. End each interaction by asking if there's anything else you can help with regarding Tbank.
-            10. Do not hallucinate or provide information from sources other than the website and provided PDFs.
-            11. If the information isn't in your knowledge base, clearly state that you don't have that information rather than guessing.
-            12. Regularly refer to the provided PDFs for accurate, up-to-date information about Tbank's products and services.
-            13. Check for the basic Grammar and Spellings and understand if the spellings or grammar is slightly incorrect.
-            14. Understand the user query with different angle, analyze properly, check through the possible answers and then give the answer.
-            15. Be forgiving of minor spelling mistakes and grammatical errors in user queries. Try to understand the intent behind the question.
-            16. Maintain context from previous messages in the conversation. If a user asks about a person or topic mentioned earlier, refer back to that information.
-            17. If a user asks about a person using only a name or title, try to identify who they're referring to based on previous context or your knowledge base.
-            18. When answering questions about specific people, provide their full name and title if available.
-            Your primary goal is to assist users with information directly related to Tbank, using only the website content and provided PDF documents. Avoid speculation and stick strictly to the provided information.
             <context>
             {context}
@@ -99,59 +98,61 @@ if "OPENAI_API_KEY" in os.environ:
             document_chain = create_stuff_documents_chain(chat, question_answering_prompt)
             return retriever, document_chain
-        # Load components
     with st.spinner("Initializing Assistant..."):
         retriever, document_chain = initialize_components()
-        # Initialize memory for each session
     if "memory" not in st.session_state:
         st.session_state.memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
-        # Chat interface
-    st.subheader("Chat AI Assistant")
-        # Initialize chat history
     if "messages" not in st.session_state:
         st.session_state.messages = []
-        # Display chat messages from history on app rerun
     for message in st.session_state.messages:
-         with st.chat_message(message["role"]):
             st.markdown(message["content"])
-        # React to user input
     if prompt := st.chat_input("What would you like to know about Document?"):
-                # Display user message in chat message container
-            st.chat_message("user").markdown(prompt)
-                # Add user message to chat history
-            st.session_state.messages.append({"role": "user", "content": prompt})
-            with st.chat_message("assistant"):
-                message_placeholder = st.empty()
-                    # Retrieve relevant documents
-                docs = retriever.get_relevant_documents(prompt)
-                    # Generate response
-                response = document_chain.invoke(
-                    {
-                        "context": docs,
-                        "chat_history": st.session_state.memory.load_memory_variables({})["chat_history"],
-                        "messages": [
-                            HumanMessage(content=prompt)
-                        ],
-                    }
-                    )
-                    # The response is already a string, so we can use it directly
             full_response = response
             message_placeholder.markdown(full_response)
-                # Add assistant response to chat history
         st.session_state.messages.append({"role": "assistant", "content": full_response})
-                # Update memory
         st.session_state.memory.save_context({"input": prompt}, {"output": full_response})
 else:

 st.set_page_config(page_title="Enterprise document search + chat", layout="wide")
 # Streamlit app header
+st.title("Enterprise document helpdesk")
 # Sidebar for API Key input
 with st.sidebar:
         dotenv.load_dotenv()
         chat = ChatOpenAI(model="gpt-3.5-turbo-1106", temperature=0.2)
+        # #loader1 = WebBaseLoader("https://www.tbankltd.com/")
         # loader1 = PyPDFLoader("Tbank resources.pdf")
         # loader2 = PyPDFLoader("International Banking Services.pdf")
         # data1 = loader1.load()
         # data2 = loader2.load()
         # data = data1 + data2
         st.header('Multiple File Upload')
+        uploaded_files = st.file_uploader('Upload your files',accept_multiple_files=True, type=['txt', 'pdf', 'csv', 'ppt', 'doc', 'xls', 'pptx', 'xlsx'])
         if uploaded_files:
             all_documents = []
             for file in uploaded_files:
                 documents = load_document(file)
                 all_documents.extend(documents)
             text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
+            all_splits = text_splitter.split_documents(data)
             embeddings = OpenAIEmbeddings(model="text-embedding-ada-002")
             vectorstore = Chroma.from_documents(documents=all_splits, embedding=embeddings)
             retriever = vectorstore.as_retriever(k=4)
             SYSTEM_TEMPLATE = """
+            You are an advanced AI assistant designed for document search and chatbot functionality. Your primary functions are:
+            1. Process and structure multiple documents in various formats, including:
+              .txt, .pdf, .csv, .ppt, .doc, .xls, .pptx, and .xlsx
+            2. Extract and organize information from these unstructured documents into a coherent, searchable format.
+            3. Retrieve relevant information from the processed documents based on user queries.
+            4. Act as a chatbot, engaging in conversations about the content of the documents.
+            5. Provide accurate and contextual responses to user questions, drawing solely from the information contained within the processed documents.
+            6. If a user's question is not related to the content of the provided documents, politely inform them that you can only answer questions based on the information in the given documents.
+            7. When answering, cite the specific document or section where the information was found, if possible.
+            8. If there's ambiguity in a query, ask for clarification to ensure you provide the most relevant information.
+            9. Maintain confidentiality and do not share or discuss information from one user's documents with other users.
+            Remember, your knowledge is limited to the content of the documents you've been given to process. Do not provide information or answer questions that are outside the scope of these documents. Always strive for accuracy and relevance in your responses.
             <context>
             {context}
             document_chain = create_stuff_documents_chain(chat, question_answering_prompt)
             return retriever, document_chain
+        else:
+          st.warning("Please Upload File to Continue")
+    # Load components
     with st.spinner("Initializing Assistant..."):
         retriever, document_chain = initialize_components()
+    # Initialize memory for each session
     if "memory" not in st.session_state:
         st.session_state.memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
+    # Chat interface
+    st.subheader("Chat with Tbank Assistant")
+    # Initialize chat history
     if "messages" not in st.session_state:
         st.session_state.messages = []
+    # Display chat messages from history on app rerun
     for message in st.session_state.messages:
+        with st.chat_message(message["role"]):
             st.markdown(message["content"])
+    # React to user input
     if prompt := st.chat_input("What would you like to know about Document?"):
+        # Display user message in chat message container
+        st.chat_message("user").markdown(prompt)
+        # Add user message to chat history
+        st.session_state.messages.append({"role": "user", "content": prompt})
+        with st.chat_message("assistant"):
+            message_placeholder = st.empty()
+            # Retrieve relevant documents
+            docs = retriever.get_relevant_documents(prompt)
+            # Generate response
+            response = document_chain.invoke(
+                {
+                    "context": docs,
+                    "chat_history": st.session_state.memory.load_memory_variables({})["chat_history"],
+                    "messages": [
+                        HumanMessage(content=prompt)
+                    ],
+                }
+            )
+            # The response is already a string, so we can use it directly
             full_response = response
             message_placeholder.markdown(full_response)
+        # Add assistant response to chat history
         st.session_state.messages.append({"role": "assistant", "content": full_response})
+        # Update memory
         st.session_state.memory.save_context({"input": prompt}, {"output": full_response})
 else: