Spaces:

1MR
/

ragopenai

Sleeping

App Files Files Community

1MR commited on Dec 22, 2024

Commit

22a9f10

verified ·

1 Parent(s): b882d51

Update app.py

Browse files

Files changed (1) hide show

app.py +50 -209

app.py CHANGED Viewed

@@ -11,6 +11,7 @@ import os
 from langchain_community.document_loaders import PyPDFLoader, TextLoader, JSONLoader, CSVLoader
 from htmlTemplates import css, bot_template, user_template
 def get_pdf_text(pdf_docs):
     temp_dir = tempfile.TemporaryDirectory()
     temp_filepath = os.path.join(temp_dir.name, pdf_docs.name)
@@ -20,6 +21,7 @@ def get_pdf_text(pdf_docs):
     pdf_doc = pdf_loader.load()
     return pdf_doc
 def get_text_file(text_docs):
     temp_dir = tempfile.TemporaryDirectory()
     temp_filepath = os.path.join(temp_dir.name, text_docs.name)
@@ -29,6 +31,7 @@ def get_text_file(text_docs):
     text_doc = text_loader.load()
     return text_doc
 def get_csv_file(csv_docs):
     temp_dir = tempfile.TemporaryDirectory()
     temp_filepath = os.path.join(temp_dir.name, csv_docs.name)
@@ -38,6 +41,7 @@ def get_csv_file(csv_docs):
     csv_doc = csv_loader.load()
     return csv_doc
 def get_json_file(json_docs):
     temp_dir = tempfile.TemporaryDirectory()
     temp_filepath = os.path.join(temp_dir.name, json_docs.name)
@@ -47,20 +51,26 @@ def get_json_file(json_docs):
     json_doc = json_loader.load()
     return json_doc
 def get_text_chunks(documents):
     text_splitter = RecursiveCharacterTextSplitter(
         chunk_size=300,
         chunk_overlap=100,
         length_function=len
     )
     documents = text_splitter.split_documents(documents)
     return documents
 def get_vectorstore(text_chunks):
     embeddings = HuggingFaceEmbeddings(model_name="WhereIsAI/UAE-Large-V1")
     vectorstore = FAISS.from_documents(text_chunks, embeddings)
     return vectorstore
 def get_conversation_chain(vectorstore, tokenH):
     if not tokenH:
         raise ValueError("API token is required to initialize the HuggingFaceHub model")
@@ -81,6 +91,8 @@ def get_conversation_chain(vectorstore, tokenH):
         except Exception as e:
             raise ValueError(f"Error generating response: {str(e)}")
     def conversation_chain(user_input):
         retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 5})
         documents = retriever.get_relevant_documents(user_input)
@@ -90,23 +102,42 @@ def get_conversation_chain(vectorstore, tokenH):
     return conversation_chain
 def handle_userinput(user_question):
     if "chat_history" not in st.session_state:
         st.session_state.chat_history = []
     response = st.session_state.conversation(user_question)
     st.session_state.chat_history.append({"role": "user", "content": user_question})
     st.session_state.chat_history.append({"role": "assistant", "content": response})
-def display_chat_history():
-    if "chat_history" in st.session_state and st.session_state.chat_history:
-        for message in st.session_state.chat_history:
-            if message["role"] == "user":
-                st.write(f"User: {message['content']}")
-            else:
-                st.write(f"Bot: {message['content']}")
-    else:
-        st.write("No chat history to display.")
 def main():
     st.set_page_config(page_title="Chat with multiple Files", page_icon=":books:")
@@ -117,11 +148,13 @@ def main():
         st.warning("Please enter a valid HuggingFace API token.")
         return
     if "conversation" not in st.session_state:
         st.session_state.conversation = None
     if "chat_history" not in st.session_state:
         st.session_state.chat_history = []
     user_question = st.text_input("Ask a question about your documents:")
     if user_question:
         if st.session_state.conversation:
@@ -129,9 +162,7 @@ def main():
         else:
             st.warning("Please upload and process files first!")
-    if st.button("Display Chat History"):
-        display_chat_history()
     docs = st.file_uploader("Upload your PDFs here and click on 'Process'", accept_multiple_files=True)
     if st.button("Process"):
         with st.spinner("Processing"):
@@ -147,209 +178,19 @@ def main():
                     elif file.type == 'application/json':
                         doc_list.extend(get_json_file(file))
                 text_chunks = get_text_chunks(doc_list)
                 vectorstore = get_vectorstore(text_chunks)
                 st.session_state.conversation = get_conversation_chain(vectorstore, tokenH)
                 st.success("Documents processed successfully!")
             else:
                 st.warning("Please upload at least one document to process.")
 if __name__ == '__main__':
     main()
-# import streamlit as st
-# from langchain.text_splitter import RecursiveCharacterTextSplitter
-# from langchain_community.embeddings import HuggingFaceEmbeddings
-# from langchain_community.vectorstores import FAISS
-# from langchain.chat_models import ChatOpenAI
-# from langchain.memory import ConversationBufferMemory
-# from langchain.chains import ConversationalRetrievalChain
-# from huggingface_hub import InferenceClient
-# import tempfile
-# import os
-# from langchain_community.document_loaders import PyPDFLoader, TextLoader, JSONLoader, CSVLoader
-# from htmlTemplates import css, bot_template, user_template
-# def get_pdf_text(pdf_docs):
-#     temp_dir = tempfile.TemporaryDirectory()
-#     temp_filepath = os.path.join(temp_dir.name, pdf_docs.name)
-#     with open(temp_filepath, "wb") as f:
-#         f.write(pdf_docs.getvalue())
-#     pdf_loader = PyPDFLoader(temp_filepath)
-#     pdf_doc = pdf_loader.load()
-#     return pdf_doc
-# def get_text_file(text_docs):
-#     temp_dir = tempfile.TemporaryDirectory()
-#     temp_filepath = os.path.join(temp_dir.name, text_docs.name)
-#     with open(temp_filepath, "wb") as f:
-#         f.write(text_docs.getvalue())
-#     text_loader = TextLoader(temp_filepath)
-#     text_doc = text_loader.load()
-#     return text_doc
-# def get_csv_file(csv_docs):
-#     temp_dir = tempfile.TemporaryDirectory()
-#     temp_filepath = os.path.join(temp_dir.name, csv_docs.name)
-#     with open(temp_filepath, "wb") as f:
-#         f.write(csv_docs.getvalue())
-#     csv_loader = CSVLoader(temp_filepath)
-#     csv_doc = csv_loader.load()
-#     return csv_doc
-# def get_json_file(json_docs):
-#     temp_dir = tempfile.TemporaryDirectory()
-#     temp_filepath = os.path.join(temp_dir.name, json_docs.name)
-#     with open(temp_filepath, "wb") as f:
-#         f.write(json_docs.getvalue())
-#     json_loader = JSONLoader(temp_filepath)
-#     json_doc = json_loader.load()
-#     return json_doc
-# def get_text_chunks(documents):
-#     text_splitter = RecursiveCharacterTextSplitter(
-#         chunk_size=300,
-#         chunk_overlap=100,
-#         length_function=len
-#     )
-#     documents = text_splitter.split_documents(documents)
-#     return documents
-# def get_vectorstore(text_chunks):
-#     embeddings = HuggingFaceEmbeddings(model_name="WhereIsAI/UAE-Large-V1")
-#     vectorstore = FAISS.from_documents(text_chunks, embeddings)
-#     return vectorstore
-# #sentence-transformers/all-MiniLM-L6-v2
-# #HuggingFaceH4/zephyr-7b-alpha
-# #Qwen/Qwen2.5-72B-Instruct
-# #mistralai/Mistral-7B-Instruct-v0.2
-# def get_conversation_chain(vectorstore, tokenH):
-#     if not tokenH:
-#         raise ValueError("API token is required to initialize the HuggingFaceHub model")
-#     try:
-#         client = InferenceClient(api_key=tokenH)
-#     except Exception as e:
-#         raise ValueError(f"Error initializing HuggingFace InferenceClient: {str(e)}")
-#     def generate_response(messages):
-#         try:
-#             completion = client.chat.completions.create(
-#                 model="Qwen/Qwen2.5-72B-Instruct",
-#                 messages=messages,
-#                 max_tokens=500
-#             )
-#             return completion.choices[0].message['content']
-#         except Exception as e:
-#             raise ValueError(f"Error generating response: {str(e)}")
-#         # messages = [{"role": "user", "content": user_input}, {"role": "system", "content": documents_text}]
-#     def conversation_chain(user_input):
-#         retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 5})
-#         documents = retriever.get_relevant_documents(user_input)
-#         documents_text = "\n".join(doc.page_content for doc in documents)
-#         messages = [{"role": "user", "content": user_input}, {"role": "system", "content": documents_text}]
-#         return generate_response(messages)
-#     return conversation_chain
-# def handle_userinput(user_question):
-#     # Ensure chat_history is initialized
-#     if "chat_history" not in st.session_state:
-#         st.session_state.chat_history = []
-#     # Get the response from the conversation
-#     response = st.session_state.conversation(user_question)
-#     # Append the user's question and the assistant's response to chat history
-#     st.session_state.chat_history.append({"role": "user", "content": user_question})
-#     st.session_state.chat_history.append({"role": "assistant", "content": response})
-#     # Display the chat history
-#     for message in st.session_state.chat_history:
-#         if message["role"] == "user":
-#             st.write(f"<div style='color: gray;'>User: {message['content']}</div>", unsafe_allow_html=True)
-#         else:
-#             st.write(f"<div style='color: black;'>Bot: {message['content']}</div>", unsafe_allow_html=True)
-#     # for i, message in enumerate(st.session_state.chat_history):
-#     #     if i % 2 == 0:
-#     #         # Display user messages
-#     #         st.write(user_template.replace("{{MSG}}", message["content"]), unsafe_allow_html=True)
-#     #     else:
-#     #         # Display assistant messages
-#     #         st.write(bot_template.replace("{{MSG}}", message["content"]), unsafe_allow_html=True)
-#     # for i, message in enumerate(st.session_state.chat_history):
-#     #     if i % 2 == 0:
-#     #         st.write(user_template.replace("{{MSG}}", message.content), unsafe_allow_html=True
-#     #         # st.write(f"<div style='color: gray;'>User: {message['content']}</div>", unsafe_allow_html=True)
-#     #     else:
-#     #         st.write(bot_template.replace("{{MSG}}", message.content), unsafe_allow_html=True
-#     #         # st.write(f"<div style='color: black;'>Bot: {message['content']}</div>", unsafe_allow_html=True)
-# def main():
-#     st.set_page_config(page_title="Chat with multiple Files", page_icon=":books:")
-#     st.header("Chat with Multiple Files")
-#     tokenH = st.text_input("Paste your HuggingFace API Token (sk-...)")
-#     if not tokenH:
-#         st.warning("Please enter a valid HuggingFace API token.")
-#         return
-#     # Initialize session state variables
-#     if "conversation" not in st.session_state:
-#         st.session_state.conversation = None
-#     if "chat_history" not in st.session_state:
-#         st.session_state.chat_history = []
-#     # User input for questions
-#     user_question = st.text_input("Ask a question about your documents:")
-#     if user_question:
-#         if st.session_state.conversation:
-#             handle_userinput(user_question)
-#         else:
-#             st.warning("Please upload and process files first!")
-#     # File uploader and processing
-#     docs = st.file_uploader("Upload your PDFs here and click on 'Process'", accept_multiple_files=True)
-#     if st.button("Process"):
-#         with st.spinner("Processing"):
-#             if docs:
-#                 doc_list = []
-#                 for file in docs:
-#                     if file.type == 'text/plain':
-#                         doc_list.extend(get_text_file(file))
-#                     elif file.type in ['application/octet-stream', 'application/pdf']:
-#                         doc_list.extend(get_pdf_text(file))
-#                     elif file.type == 'text/csv':
-#                         doc_list.extend(get_csv_file(file))
-#                     elif file.type == 'application/json':
-#                         doc_list.extend(get_json_file(file))
-#                 # Generate text chunks
-#                 text_chunks = get_text_chunks(doc_list)
-#                 # Create vector store
-#                 vectorstore = get_vectorstore(text_chunks)
-#                 # Initialize conversation chain
-#                 st.session_state.conversation = get_conversation_chain(vectorstore, tokenH)
-#                 st.success("Documents processed successfully!")
-#             else:
-#                 st.warning("Please upload at least one document to process.")
-# if __name__ == '__main__':
-#     main()

 from langchain_community.document_loaders import PyPDFLoader, TextLoader, JSONLoader, CSVLoader
 from htmlTemplates import css, bot_template, user_template
 def get_pdf_text(pdf_docs):
     temp_dir = tempfile.TemporaryDirectory()
     temp_filepath = os.path.join(temp_dir.name, pdf_docs.name)
     pdf_doc = pdf_loader.load()
     return pdf_doc
 def get_text_file(text_docs):
     temp_dir = tempfile.TemporaryDirectory()
     temp_filepath = os.path.join(temp_dir.name, text_docs.name)
     text_doc = text_loader.load()
     return text_doc
 def get_csv_file(csv_docs):
     temp_dir = tempfile.TemporaryDirectory()
     temp_filepath = os.path.join(temp_dir.name, csv_docs.name)
     csv_doc = csv_loader.load()
     return csv_doc
 def get_json_file(json_docs):
     temp_dir = tempfile.TemporaryDirectory()
     temp_filepath = os.path.join(temp_dir.name, json_docs.name)
     json_doc = json_loader.load()
     return json_doc
 def get_text_chunks(documents):
     text_splitter = RecursiveCharacterTextSplitter(
         chunk_size=300,
         chunk_overlap=100,
         length_function=len
     )
     documents = text_splitter.split_documents(documents)
     return documents
 def get_vectorstore(text_chunks):
     embeddings = HuggingFaceEmbeddings(model_name="WhereIsAI/UAE-Large-V1")
     vectorstore = FAISS.from_documents(text_chunks, embeddings)
     return vectorstore
+#sentence-transformers/all-MiniLM-L6-v2
+#HuggingFaceH4/zephyr-7b-alpha
+#Qwen/Qwen2.5-72B-Instruct
+#mistralai/Mistral-7B-Instruct-v0.2
 def get_conversation_chain(vectorstore, tokenH):
     if not tokenH:
         raise ValueError("API token is required to initialize the HuggingFaceHub model")
         except Exception as e:
             raise ValueError(f"Error generating response: {str(e)}")
+        # messages = [{"role": "user", "content": user_input}, {"role": "system", "content": documents_text}]
     def conversation_chain(user_input):
         retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 5})
         documents = retriever.get_relevant_documents(user_input)
     return conversation_chain
 def handle_userinput(user_question):
+    # Ensure chat_history is initialized
     if "chat_history" not in st.session_state:
         st.session_state.chat_history = []
+    # Get the response from the conversation
     response = st.session_state.conversation(user_question)
+    # Append the user's question and the assistant's response to chat history
     st.session_state.chat_history.append({"role": "user", "content": user_question})
     st.session_state.chat_history.append({"role": "assistant", "content": response})
+    # Display the chat history
+    for message in st.session_state.chat_history:
+        if message["role"] == "user":
+            st.write(f"<div style='color: gray;'>User: {message['content']}</div>", unsafe_allow_html=True)
+        else:
+            st.write(f"<div style='color: black;'>Bot: {message['content']}</div>", unsafe_allow_html=True)
+    # for i, message in enumerate(st.session_state.chat_history):
+    #     if i % 2 == 0:
+    #         # Display user messages
+    #         st.write(user_template.replace("{{MSG}}", message["content"]), unsafe_allow_html=True)
+    #     else:
+    #         # Display assistant messages
+    #         st.write(bot_template.replace("{{MSG}}", message["content"]), unsafe_allow_html=True)
+    # for i, message in enumerate(st.session_state.chat_history):
+    #     if i % 2 == 0:
+    #         st.write(user_template.replace("{{MSG}}", message.content), unsafe_allow_html=True
+    #         # st.write(f"<div style='color: gray;'>User: {message['content']}</div>", unsafe_allow_html=True)
+    #     else:
+    #         st.write(bot_template.replace("{{MSG}}", message.content), unsafe_allow_html=True
+    #         # st.write(f"<div style='color: black;'>Bot: {message['content']}</div>", unsafe_allow_html=True)
 def main():
     st.set_page_config(page_title="Chat with multiple Files", page_icon=":books:")
         st.warning("Please enter a valid HuggingFace API token.")
         return
+    # Initialize session state variables
     if "conversation" not in st.session_state:
         st.session_state.conversation = None
     if "chat_history" not in st.session_state:
         st.session_state.chat_history = []
+    # User input for questions
     user_question = st.text_input("Ask a question about your documents:")
     if user_question:
         if st.session_state.conversation:
         else:
             st.warning("Please upload and process files first!")
+    # File uploader and processing
     docs = st.file_uploader("Upload your PDFs here and click on 'Process'", accept_multiple_files=True)
     if st.button("Process"):
         with st.spinner("Processing"):
                     elif file.type == 'application/json':
                         doc_list.extend(get_json_file(file))
+                # Generate text chunks
                 text_chunks = get_text_chunks(doc_list)
+                # Create vector store
                 vectorstore = get_vectorstore(text_chunks)
+                # Initialize conversation chain
                 st.session_state.conversation = get_conversation_chain(vectorstore, tokenH)
                 st.success("Documents processed successfully!")
             else:
                 st.warning("Please upload at least one document to process.")
 if __name__ == '__main__':
     main()