dadashzadeh commited on
Commit
7e96445
·
verified ·
1 Parent(s): 9ac3c6f

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +372 -0
app.py ADDED
@@ -0,0 +1,372 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import os
4
+ import tempfile
5
+ from langchain_openai import ChatOpenAI, OpenAIEmbeddings
6
+ from langchain_community.vectorstores import FAISS
7
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
8
+ from langchain_core.prompts import PromptTemplate
9
+ from langchain_core.runnables import RunnablePassthrough
10
+ from langchain_core.output_parsers import StrOutputParser
11
+ import requests
12
+ from bs4 import BeautifulSoup
13
+ from langchain.agents import initialize_agent, Tool, AgentType
14
+ from operator import itemgetter
15
+
16
+ # --- Page Configuration ---
17
+ st.set_page_config(
18
+ page_title="Chatbot Excel",
19
+ page_icon="📊",
20
+ layout="centered",
21
+ initial_sidebar_state="expanded"
22
+ )
23
+
24
+ # --- Custom Styles ---
25
+ st.markdown(
26
+ """
27
+ <style>
28
+ .stButton > button {
29
+ background-color: #007bff;
30
+ color: white;
31
+ border: none;
32
+ border-radius: 5px;
33
+ padding: 0.5em 1em;
34
+ font-size: 1em;
35
+ font-weight: 600;
36
+ }
37
+ .greeting-text {
38
+ font-size: 2.5em;
39
+ color: transparent;
40
+ background-image: linear-gradient(90deg, #00529B, #00A9E0);
41
+ -webkit-background-clip: text;
42
+ font-weight: 600;
43
+ text-align: center !important;
44
+ }
45
+ .sidebar .sidebar-content {
46
+ background-color: #f0f2f6;
47
+ }
48
+ </style>
49
+ """,
50
+ unsafe_allow_html=True
51
+ )
52
+
53
+ # --- Helper function to format chat history ---
54
+ def format_chat_history(chat_messages_list):
55
+ """Formats chat history for LLM prompt, excluding the last user message."""
56
+ history_for_prompt = chat_messages_list[:-1]
57
+
58
+ if not history_for_prompt:
59
+ return "No conversation history available."
60
+
61
+ formatted_history = []
62
+ for msg in history_for_prompt:
63
+ role_label = "User" if msg["role"] == "user" else "Assistant"
64
+ formatted_history.append(f"{role_label}: {msg['content']}")
65
+ return "\n".join(formatted_history)
66
+
67
+ # --- Bing Web Search Function ---
68
+ def bing_search_tool_function(query: str) -> str:
69
+ headers = {
70
+ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
71
+ }
72
+ quoted_query = requests.utils.quote(query)
73
+ search_url = f"https://www.bing.com/search?q={quoted_query}&qs=HS&pq=se&sc=10-2&cvid=C9D3906F723C49862C937B28F8106C8C&FORM=QBLH&sp=1&lq=0"
74
+ results_list = []
75
+ try:
76
+ response = requests.get(search_url, headers=headers, timeout=10)
77
+ response.raise_for_status()
78
+ soup = BeautifulSoup(response.content, "html.parser")
79
+ for item in soup.find_all("li", attrs={"class": "b_algo"}):
80
+ title_tag = item.find("h2")
81
+ title = title_tag.get_text().strip() if title_tag else "No title"
82
+ link_tag = title_tag.find("a") if title_tag else None
83
+ link = link_tag["href"] if link_tag else "No link"
84
+ description_text = "No description available."
85
+ description_tag = item.find(class_="b_caption")
86
+ if description_tag:
87
+ description_text = description_tag.get_text().strip()
88
+ elif item.find("p"):
89
+ caption_div = item.find("p")
90
+ if caption_div:
91
+ description_text = caption_div.get_text().strip()
92
+ description_text = description_text + " url:" + link
93
+ if title != "No title" or description_text != "No description available.":
94
+ results_list.append({"title": title, "description": description_text})
95
+ if not results_list:
96
+ return "Unfortunately, I couldn't find any results on the web for this search."
97
+ formatted_output = "\n\n".join([
98
+ f"Title: {res['title']}\nDescription: {res['description']}"
99
+ for res in results_list[:5]
100
+ ])
101
+ return formatted_output
102
+ except requests.exceptions.RequestException as e:
103
+ return f"Network error in web search: {e}"
104
+ except Exception as e:
105
+ return f"Error parsing search results: {e}"
106
+
107
+ # --- Main function for Q&A from data and web search ---
108
+ def chat_with_data_and_web(api_key, base_url):
109
+ st.write('<div class="greeting-text">Hello! Welcome to Chatbot Excel.</div>', unsafe_allow_html=True)
110
+
111
+ # --- Function to completely reset chat and RAG state ---
112
+ def reset_all_chat_and_rag_state():
113
+ keys_to_clear = ['rag_initialized_for_file', 'retriever', 'chat_messages']
114
+ for key in keys_to_clear:
115
+ if key in st.session_state:
116
+ del st.session_state[key]
117
+
118
+ st.session_state.chat_messages = [{"role": "assistant", "content": "Hello! Upload a file to get started or ask me to search the web."}]
119
+
120
+ # Reset file_uploader by changing its key
121
+ if 'uploader_key_suffix_counter' not in st.session_state:
122
+ st.session_state.uploader_key_suffix_counter = 0
123
+ st.session_state.uploader_key_suffix_counter += 1
124
+
125
+ st.rerun()
126
+
127
+ # --- Function to reset RAG state on file change (without clearing chat history) ---
128
+ def reset_rag_on_file_change():
129
+ keys_to_clear = ['rag_initialized_for_file', 'retriever']
130
+ for key in keys_to_clear:
131
+ if key in st.session_state:
132
+ del st.session_state[key]
133
+
134
+ if "chat_messages" not in st.session_state:
135
+ st.session_state.chat_messages = []
136
+ st.session_state.chat_messages.append({
137
+ "role": "assistant",
138
+ "content": "New file detected. Preparing for Q&A..."
139
+ })
140
+
141
+ # --- "Start New Chat" button in sidebar ---
142
+ if st.sidebar.button("Start New Chat", key="new_chat_btn"):
143
+ reset_all_chat_and_rag_state()
144
+
145
+ # Initialize uploader key counter if not exists
146
+ if 'uploader_key_suffix_counter' not in st.session_state:
147
+ st.session_state.uploader_key_suffix_counter = 0
148
+
149
+ current_uploader_key = f"main_file_uploader_{st.session_state.uploader_key_suffix_counter}"
150
+
151
+ uploaded_file = st.file_uploader(
152
+ "Upload Excel or CSV file (optional for web search):",
153
+ type=["xlsx", "csv"],
154
+ key=current_uploader_key,
155
+ on_change=reset_rag_on_file_change
156
+ )
157
+
158
+ if "chat_messages" not in st.session_state:
159
+ st.session_state.chat_messages = [{"role": "assistant", "content": "Upload a file to ask questions about its content, or ask me to search the web."}]
160
+
161
+ for msg in st.session_state.chat_messages:
162
+ with st.chat_message(msg["role"]):
163
+ st.markdown(msg["content"])
164
+
165
+ # --- File processing logic ---
166
+ if uploaded_file is not None and st.session_state.get('rag_initialized_for_file') != uploaded_file.name:
167
+ with st.spinner(f"Processing file {uploaded_file.name} for Q&A..."):
168
+ tmp_file_path = None
169
+ try:
170
+ with tempfile.NamedTemporaryFile(delete=False, suffix=os.path.splitext(uploaded_file.name)[1]) as tmp_file:
171
+ tmp_file.write(uploaded_file.getvalue())
172
+ tmp_file_path = tmp_file.name
173
+
174
+ if uploaded_file.name.endswith(".xlsx"):
175
+ df = pd.read_excel(tmp_file_path)
176
+ elif uploaded_file.name.endswith(".csv"):
177
+ df = pd.read_csv(tmp_file_path, encoding='utf-8')
178
+ else:
179
+ st.error("File type not supported.")
180
+ if 'rag_initialized_for_file' in st.session_state: del st.session_state['rag_initialized_for_file']
181
+ if 'retriever' in st.session_state: del st.session_state.retriever
182
+ return
183
+
184
+ st.write("### Data Preview (first 5 rows):")
185
+ st.dataframe(df.head())
186
+
187
+ if df.empty:
188
+ st.warning("The uploaded file is empty. Q&A from file will not be effective.")
189
+ st.session_state.rag_initialized_for_file = "empty_file_" + uploaded_file.name
190
+ st.session_state.chat_messages.append({"role": "assistant", "content": f"File '{uploaded_file.name}' is empty. Would you like me to search the web?"})
191
+ st.rerun()
192
+ return
193
+
194
+ documents_for_rag = [f"Row {idx}: " + ", ".join([f"{col}: {str(val)}" for col, val in row.items() if pd.notna(val)]) for idx, row in df.iterrows()]
195
+
196
+ if not documents_for_rag:
197
+ st.warning("No data extracted from file for Q&A.")
198
+ st.session_state.rag_initialized_for_file = "no_docs_" + uploaded_file.name
199
+ st.session_state.chat_messages.append({"role": "assistant", "content": f"No data extracted from file '{uploaded_file.name}' for processing."})
200
+ st.rerun()
201
+ return
202
+
203
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=150)
204
+ all_splits = text_splitter.create_documents(documents_for_rag)
205
+
206
+ if not all_splits:
207
+ st.warning("Text splitting resulted in no chunks. File may be too small for Q&A.")
208
+ st.session_state.rag_initialized_for_file = "no_splits_" + uploaded_file.name
209
+ st.session_state.chat_messages.append({"role": "assistant", "content": f"File '{uploaded_file.name}' was too small for splitting and processing."})
210
+ st.rerun()
211
+ return
212
+
213
+ embeddings_model = OpenAIEmbeddings(
214
+ openai_api_key=api_key,
215
+ model="text-embedding-3-small",
216
+ base_url=base_url
217
+ )
218
+ vectorstore = FAISS.from_documents(all_splits, embeddings_model)
219
+ st.session_state.retriever = vectorstore.as_retriever(search_kwargs={"k": 5})
220
+ st.session_state.rag_initialized_for_file = uploaded_file.name
221
+
222
+ st.session_state.chat_messages.append({
223
+ "role": "assistant",
224
+ "content": f"File '{uploaded_file.name}' successfully processed. You can now ask questions about its content or request web searches."
225
+ })
226
+ st.rerun()
227
+
228
+ except Exception as e:
229
+ st.error(f"Error processing file for Q&A: {e}")
230
+ if 'rag_initialized_for_file' in st.session_state: del st.session_state['rag_initialized_for_file']
231
+ if 'retriever' in st.session_state: del st.session_state.retriever
232
+ st.session_state.chat_messages.append({"role": "assistant", "content": f"Error processing file: {e}"})
233
+ st.rerun()
234
+ finally:
235
+ if tmp_file_path and os.path.exists(tmp_file_path):
236
+ os.remove(tmp_file_path)
237
+
238
+ if prompt := st.chat_input("Ask your question or tell me to search the web:"):
239
+ st.session_state.chat_messages.append({"role": "user", "content": prompt})
240
+ with st.chat_message("user"):
241
+ st.markdown(prompt)
242
+
243
+ with st.chat_message("assistant"):
244
+ message_placeholder = st.empty()
245
+ message_placeholder.markdown("Thinking...")
246
+
247
+ search_keywords = ["search for", "look up", "find", "search", "web search"]
248
+ is_search_request = any(prompt.lower().startswith(kw) for kw in search_keywords)
249
+
250
+ search_query = prompt
251
+ for kw in search_keywords:
252
+ if prompt.lower().startswith(kw):
253
+ search_query = prompt[len(kw):].strip()
254
+ break
255
+
256
+ response_text = ""
257
+ try:
258
+ if not api_key:
259
+ error_msg = "OpenAI API key not provided. Please enter it in the sidebar."
260
+ message_placeholder.error(error_msg)
261
+ st.session_state.chat_messages.append({"role": "assistant", "content": error_msg})
262
+ return
263
+
264
+ llm_rag = ChatOpenAI(
265
+ model="gpt-4o-mini",
266
+ temperature=0.2,
267
+ openai_api_key=api_key,
268
+ base_url=base_url
269
+ )
270
+
271
+ if is_search_request and search_query:
272
+ message_placeholder.markdown(f"Searching the web for: '{search_query}' using agent...")
273
+ llm_for_agent = ChatOpenAI(
274
+ model="gpt-4o-mini",
275
+ temperature=0.7,
276
+ openai_api_key=api_key,
277
+ base_url=base_url
278
+ )
279
+ tools = [
280
+ Tool(
281
+ name="BingSearch",
282
+ func=bing_search_tool_function,
283
+ description="Search the web using Bing and provide detailed results"
284
+ )
285
+ ]
286
+ agent = initialize_agent(
287
+ tools,
288
+ llm_for_agent,
289
+ agent_type=AgentType.REACT_DOCSTORE,
290
+ verbose=True,
291
+ handle_parsing_errors=True
292
+ )
293
+ try:
294
+ response_text = agent.run(search_query)
295
+ except Exception as agent_exc:
296
+ st.error(f"Error running search agent: {agent_exc}")
297
+ response_text = f"Unfortunately, there was a problem processing the web search: {agent_exc}"
298
+
299
+ elif uploaded_file and 'retriever' in st.session_state and st.session_state.get('rag_initialized_for_file') == uploaded_file.name:
300
+ retriever = st.session_state.retriever
301
+
302
+ rag_prompt_template = """Based on the previous conversation history and the text below extracted from the uploaded file, answer the user's question.
303
+ If the information is not available in the text, state that the information was not found in the provided data. Do not make up an answer.
304
+
305
+ Conversation History:
306
+ {chat_history}
307
+
308
+ Extracted Text:
309
+ {context}
310
+
311
+ Current User Question: {question}
312
+
313
+ Answer:"""
314
+ rag_prompt = PromptTemplate.from_template(rag_prompt_template)
315
+
316
+ def format_docs(docs):
317
+ return "\n\n".join(doc.page_content for doc in docs)
318
+
319
+ rag_chain = (
320
+ {
321
+ "context": itemgetter("question") | retriever | format_docs,
322
+ "question": itemgetter("question"),
323
+ "chat_history": itemgetter("chat_history")
324
+ }
325
+ | rag_prompt
326
+ | llm_rag
327
+ | StrOutputParser()
328
+ )
329
+
330
+ formatted_history = format_chat_history(st.session_state.chat_messages)
331
+
332
+ response_text = rag_chain.invoke({
333
+ "question": prompt,
334
+ "chat_history": formatted_history
335
+ })
336
+
337
+ elif uploaded_file and st.session_state.get('rag_initialized_for_file') != uploaded_file.name:
338
+ response_text = f"File '{uploaded_file.name}' is still being processed or encountered an issue. Please wait or re-upload. You can also ask me to search the web."
339
+ elif not uploaded_file and not is_search_request:
340
+ response_text = "Please upload a file to ask questions about its content, or ask me to search the web (e.g., 'search for...')."
341
+ elif not search_query and is_search_request:
342
+ response_text = "It seems you wanted to search, but didn't specify what to search for. Please try again, e.g., 'search for latest tech news'."
343
+
344
+ message_placeholder.markdown(response_text)
345
+ st.session_state.chat_messages.append({"role": "assistant", "content": response_text})
346
+
347
+ except Exception as e:
348
+ error_msg = f"An error occurred: {e}"
349
+ st.error(error_msg)
350
+ if not response_text:
351
+ message_placeholder.markdown(f"Unfortunately, something went wrong: {e}")
352
+ st.session_state.chat_messages.append({"role": "assistant", "content": f"Unfortunately, something went wrong: {e}"})
353
+
354
+ elif not uploaded_file and len(st.session_state.get("chat_messages", [])) <= 1:
355
+ st.info("Upload an Excel or CSV file to chat about its data, or tell me to search the web.")
356
+
357
+ # --- Main UI Layout ---
358
+ def main():
359
+ st.sidebar.info("Ask questions about uploaded data or request web searches.")
360
+ st.title("Chatbot Excel")
361
+
362
+ api_key = st.sidebar.text_input("Enter your OpenAI API Key:", type="password", key="main_api_key_input")
363
+ base_url = st.sidebar.text_input("Enter API Base URL:", key="base_url_input")
364
+
365
+ if api_key and base_url:
366
+ chat_with_data_and_web(api_key, base_url)
367
+ else:
368
+ st.sidebar.warning("Please enter your OpenAI API key and base URL to continue.")
369
+ st.info("Please enter your OpenAI API key and base URL in the sidebar to use the application features.")
370
+
371
+ if __name__ == "__main__":
372
+ main()