AjiNiktech commited on
Commit
edb80a3
·
verified ·
1 Parent(s): ad9a31a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +123 -121
app.py CHANGED
@@ -1,16 +1,15 @@
1
  import streamlit as st
2
- from langchain_openai import ChatOpenAI
3
  import os
4
  import dotenv
5
- from langchain_community.document_loaders import WebBaseLoader
6
  from langchain_text_splitters import RecursiveCharacterTextSplitter
7
  from langchain_chroma import Chroma
8
- from langchain_openai import OpenAIEmbeddings
9
  from langchain.chains.combine_documents import create_stuff_documents_chain
10
  from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
11
  from langchain_core.messages import HumanMessage, AIMessage
12
  from langchain.memory import ConversationBufferMemory
13
- from langchain.document_loaders import PyPDFLoader
14
 
15
  # Set page config
16
  st.set_page_config(page_title="Enterprise document search + chat", layout="wide")
@@ -27,153 +26,156 @@ with st.sidebar:
27
 
28
  # Main app logic
29
  if "OPENAI_API_KEY" in os.environ:
30
- # Initialize components
31
- @st.cache_resource
32
- def initialize_components():
33
- dotenv.load_dotenv()
34
- chat = ChatOpenAI(model="gpt-3.5-turbo-1106", temperature=0.2)
35
-
36
- # #loader1 = WebBaseLoader("https://www.tbankltd.com/")
37
- # loader1 = PyPDFLoader("Tbank resources.pdf")
38
- # loader2 = PyPDFLoader("International Banking Services.pdf")
39
- # data1 = loader1.load()
40
- # data2 = loader2.load()
41
- # data = data1 + data2
42
- st.header('Multiple File Upload')
43
- uploaded_files = st.file_uploader('Upload your files',accept_multiple_files=True, type=['txt', 'pdf', 'csv', 'ppt', 'doc', 'xls', 'pptx', 'xlsx'])
44
- def load_file(file):
45
- file_extension = os.path.splitext(file.name)[1].lower()
46
- with tempfile.NamedTemporaryFile(delete=False, suffix=file_extension) as temp_file:
47
- temp_file.write(file.getvalue())
48
- temp_file_path = temp_file.name
49
-
50
- if file_extension == '.txt':
51
- loader = TextLoader(temp_file_path)
52
- elif file_extension == '.pdf':
53
- loader = PyPDFLoader(temp_file_path)
54
- elif file_extension == '.csv':
55
- loader = CSVLoader(temp_file_path)
56
- elif file_extension in ['.ppt', '.pptx']:
57
- loader = UnstructuredPowerPointLoader(temp_file_path)
58
- elif file_extension in ['.doc', '.docx']:
59
- loader = UnstructuredWordDocumentLoader(temp_file_path)
60
- elif file_extension in ['.xls', '.xlsx']:
61
- loader = UnstructuredExcelLoader(temp_file_path)
62
- else:
63
- os.unlink(temp_file_path)
64
- raise ValueError(f"Unsupported file type: {file_extension}")
65
-
66
- documents = loader.load()
67
  os.unlink(temp_file_path)
68
- return documents
 
 
 
 
 
 
 
 
 
 
 
69
  text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
70
- all_splits = text_splitter.split_documents(documents)
71
- embeddings = OpenAIEmbeddings(model="text-embedding-ada-002")
72
- vectorstore = Chroma.from_documents(documents=all_splits, embedding=embeddings)
73
- retriever = vectorstore.as_retriever(k=4)
74
 
75
- SYSTEM_TEMPLATE = """
76
- You are an advanced AI assistant designed for document search and chatbot functionality. Your primary functions are:
 
 
 
 
 
 
77
 
78
- 1. Process and structure multiple documents in various formats, including:
79
- .txt, .pdf, .csv, .ppt, .doc, .xls, .pptx, and .xlsx
80
 
81
- 2. Extract and organize information from these unstructured documents into a coherent, searchable format.
 
82
 
83
- 3. Retrieve relevant information from the processed documents based on user queries.
84
 
85
- 4. Act as a chatbot, engaging in conversations about the content of the documents.
86
 
87
- 5. Provide accurate and contextual responses to user questions, drawing solely from the information contained within the processed documents.
88
 
89
- 6. If a user's question is not related to the content of the provided documents, politely inform them that you can only answer questions based on the information in the given documents.
90
 
91
- 7. When answering, cite the specific document or section where the information was found, if possible.
92
 
93
- 8. If there's ambiguity in a query, ask for clarification to ensure you provide the most relevant information.
94
 
95
- 9. Maintain confidentiality and do not share or discuss information from one user's documents with other users.
96
 
97
- Remember, your knowledge is limited to the content of the documents you've been given to process. Do not provide information or answer questions that are outside the scope of these documents. Always strive for accuracy and relevance in your responses.
98
 
99
- <context>
100
- {context}
101
- </context>
102
 
103
- Chat History:
104
- {chat_history}
105
- """
106
 
107
- question_answering_prompt = ChatPromptTemplate.from_messages(
108
- [
109
- (
110
- "system",
111
- SYSTEM_TEMPLATE,
112
- ),
113
- MessagesPlaceholder(variable_name="chat_history"),
114
- MessagesPlaceholder(variable_name="messages"),
115
- ]
116
- )
117
 
118
- document_chain = create_stuff_documents_chain(chat, question_answering_prompt)
 
 
 
 
 
 
 
 
 
119
 
120
- return retriever, document_chain
121
- # else:
122
- # st.warning("Please Upload File to Continue")
123
 
124
- # Load components
125
- with st.spinner("Initializing Assistant..."):
126
- retriever, document_chain = initialize_components()
127
 
128
- # Initialize memory for each session
129
- if "memory" not in st.session_state:
130
- st.session_state.memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
131
 
132
- # Chat interface
133
- st.subheader("Chat with Assistant")
 
134
 
135
- # Initialize chat history
136
- if "messages" not in st.session_state:
137
- st.session_state.messages = []
138
 
139
- # Display chat messages from history on app rerun
140
- for message in st.session_state.messages:
141
- with st.chat_message(message["role"]):
142
- st.markdown(message["content"])
143
 
144
- # React to user input
145
- if prompt := st.chat_input("What would you like to know about Document?"):
146
- # Display user message in chat message container
147
- st.chat_message("user").markdown(prompt)
148
- # Add user message to chat history
149
- st.session_state.messages.append({"role": "user", "content": prompt})
150
 
151
- with st.chat_message("assistant"):
152
- message_placeholder = st.empty()
 
 
 
 
153
 
154
- # Retrieve relevant documents
155
- docs = retriever.get_relevant_documents(prompt)
156
 
157
- # Generate response
158
- response = document_chain.invoke(
159
- {
160
- "context": docs,
161
- "chat_history": st.session_state.memory.load_memory_variables({})["chat_history"],
162
- "messages": [
163
- HumanMessage(content=prompt)
164
- ],
165
- }
166
- )
 
 
 
 
 
 
 
167
 
168
- # The response is already a string, so we can use it directly
169
- full_response = response
170
- message_placeholder.markdown(full_response)
171
 
172
- # Add assistant response to chat history
173
- st.session_state.messages.append({"role": "assistant", "content": full_response})
174
 
175
- # Update memory
176
- st.session_state.memory.save_context({"input": prompt}, {"output": full_response})
177
 
178
  else:
179
  st.warning("Please enter your OpenAI API Key in the sidebar to start the chatbot.")
 
1
  import streamlit as st
2
+ from langchain_openai import ChatOpenAI, OpenAIEmbeddings
3
  import os
4
  import dotenv
5
+ from langchain_community.document_loaders import TextLoader, PyPDFLoader, CSVLoader, UnstructuredPowerPointLoader, UnstructuredWordDocumentLoader, UnstructuredExcelLoader
6
  from langchain_text_splitters import RecursiveCharacterTextSplitter
7
  from langchain_chroma import Chroma
 
8
  from langchain.chains.combine_documents import create_stuff_documents_chain
9
  from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
10
  from langchain_core.messages import HumanMessage, AIMessage
11
  from langchain.memory import ConversationBufferMemory
12
+ import tempfile
13
 
14
  # Set page config
15
  st.set_page_config(page_title="Enterprise document search + chat", layout="wide")
 
26
 
27
  # Main app logic
28
  if "OPENAI_API_KEY" in os.environ:
29
+ st.header('Multiple File Upload')
30
+ uploaded_files = st.file_uploader('Upload your files', accept_multiple_files=True, type=['txt', 'pdf', 'csv', 'ppt', 'doc', 'xls', 'pptx', 'xlsx'])
31
+
32
+ def load_file(file):
33
+ file_extension = os.path.splitext(file.name)[1].lower()
34
+ with tempfile.NamedTemporaryFile(delete=False, suffix=file_extension) as temp_file:
35
+ temp_file.write(file.getvalue())
36
+ temp_file_path = temp_file.name
37
+
38
+ if file_extension == '.txt':
39
+ loader = TextLoader(temp_file_path)
40
+ elif file_extension == '.pdf':
41
+ loader = PyPDFLoader(temp_file_path)
42
+ elif file_extension == '.csv':
43
+ loader = CSVLoader(temp_file_path)
44
+ elif file_extension in ['.ppt', '.pptx']:
45
+ loader = UnstructuredPowerPointLoader(temp_file_path)
46
+ elif file_extension in ['.doc', '.docx']:
47
+ loader = UnstructuredWordDocumentLoader(temp_file_path)
48
+ elif file_extension in ['.xls', '.xlsx']:
49
+ loader = UnstructuredExcelLoader(temp_file_path)
50
+ else:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
51
  os.unlink(temp_file_path)
52
+ raise ValueError(f"Unsupported file type: {file_extension}")
53
+
54
+ documents = loader.load()
55
+ os.unlink(temp_file_path)
56
+ return documents
57
+
58
+ # Process uploaded files
59
+ if uploaded_files:
60
+ all_documents = []
61
+ for file in uploaded_files:
62
+ all_documents.extend(load_file(file))
63
+
64
  text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
65
+ all_splits = text_splitter.split_documents(all_documents)
 
 
 
66
 
67
+ # Initialize components
68
+ @st.cache_resource
69
+ def initialize_components(_all_splits):
70
+ dotenv.load_dotenv()
71
+ chat = ChatOpenAI(model="gpt-3.5-turbo-1106", temperature=0.2)
72
+ embeddings = OpenAIEmbeddings(model="text-embedding-ada-002")
73
+ vectorstore = Chroma.from_documents(documents=_all_splits, embedding=embeddings)
74
+ retriever = vectorstore.as_retriever(k=4)
75
 
76
+ SYSTEM_TEMPLATE = """
77
+ You are an advanced AI assistant designed for document search and chatbot functionality. Your primary functions are:
78
 
79
+ 1. Process and structure multiple documents in various formats, including:
80
+ .txt, .pdf, .csv, .ppt, .doc, .xls, .pptx, and .xlsx
81
 
82
+ 2. Extract and organize information from these unstructured documents into a coherent, searchable format.
83
 
84
+ 3. Retrieve relevant information from the processed documents based on user queries.
85
 
86
+ 4. Act as a chatbot, engaging in conversations about the content of the documents.
87
 
88
+ 5. Provide accurate and contextual responses to user questions, drawing solely from the information contained within the processed documents.
89
 
90
+ 6. If a user's question is not related to the content of the provided documents, politely inform them that you can only answer questions based on the information in the given documents.
91
 
92
+ 7. When answering, cite the specific document or section where the information was found, if possible.
93
 
94
+ 8. If there's ambiguity in a query, ask for clarification to ensure you provide the most relevant information.
95
 
96
+ 9. Maintain confidentiality and do not share or discuss information from one user's documents with other users.
97
 
98
+ Remember, your knowledge is limited to the content of the documents you've been given to process. Do not provide information or answer questions that are outside the scope of these documents. Always strive for accuracy and relevance in your responses.
 
 
99
 
100
+ <context>
101
+ {context}
102
+ </context>
103
 
104
+ Chat History:
105
+ {chat_history}
106
+ """
 
 
 
 
 
 
 
107
 
108
+ question_answering_prompt = ChatPromptTemplate.from_messages(
109
+ [
110
+ (
111
+ "system",
112
+ SYSTEM_TEMPLATE,
113
+ ),
114
+ MessagesPlaceholder(variable_name="chat_history"),
115
+ MessagesPlaceholder(variable_name="messages"),
116
+ ]
117
+ )
118
 
119
+ document_chain = create_stuff_documents_chain(chat, question_answering_prompt)
 
 
120
 
121
+ return retriever, document_chain
 
 
122
 
123
+ # Load components
124
+ with st.spinner("Initializing Assistant..."):
125
+ retriever, document_chain = initialize_components(all_splits)
126
 
127
+ # Initialize memory for each session
128
+ if "memory" not in st.session_state:
129
+ st.session_state.memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
130
 
131
+ # Chat interface
132
+ st.subheader("Chat with Assistant")
 
133
 
134
+ # Initialize chat history
135
+ if "messages" not in st.session_state:
136
+ st.session_state.messages = []
 
137
 
138
+ # Display chat messages from history on app rerun
139
+ for message in st.session_state.messages:
140
+ with st.chat_message(message["role"]):
141
+ st.markdown(message["content"])
 
 
142
 
143
+ # React to user input
144
+ if prompt := st.chat_input("What would you like to know about Document?"):
145
+ # Display user message in chat message container
146
+ st.chat_message("user").markdown(prompt)
147
+ # Add user message to chat history
148
+ st.session_state.messages.append({"role": "user", "content": prompt})
149
 
150
+ with st.chat_message("assistant"):
151
+ message_placeholder = st.empty()
152
 
153
+ # Retrieve relevant documents
154
+ docs = retriever.get_relevant_documents(prompt)
155
+
156
+ # Generate response
157
+ response = document_chain.invoke(
158
+ {
159
+ "context": docs,
160
+ "chat_history": st.session_state.memory.load_memory_variables({})["chat_history"],
161
+ "messages": [
162
+ HumanMessage(content=prompt)
163
+ ],
164
+ }
165
+ )
166
+
167
+ # The response is already a string, so we can use it directly
168
+ full_response = response
169
+ message_placeholder.markdown(full_response)
170
 
171
+ # Add assistant response to chat history
172
+ st.session_state.messages.append({"role": "assistant", "content": full_response})
 
173
 
174
+ # Update memory
175
+ st.session_state.memory.save_context({"input": prompt}, {"output": full_response})
176
 
177
+ else:
178
+ st.warning("Please upload files to continue.")
179
 
180
  else:
181
  st.warning("Please enter your OpenAI API Key in the sidebar to start the chatbot.")