MahatirTusher commited on
Commit
a2f22ef
Β·
verified Β·
1 Parent(s): 85a4e5f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +87 -73
app.py CHANGED
@@ -91,19 +91,21 @@ st.markdown("""
91
  # Set Streamlit app title
92
  st.title("WebChatter πŸ’¬")
93
 
94
- # Initialize session state for FAISS index
95
  if "index_created" not in st.session_state:
96
  st.session_state.index_created = False
 
 
97
 
98
  # Sidebar for URL input
99
  with st.sidebar:
100
  st.header("Enter Web URL")
101
  url = st.text_input("URL", placeholder="e.g., https://www.bbc.com/news/science-environment-67299122")
102
- urls = [url] if url else []
103
  process_url_clicked = st.button("Process URL")
104
 
105
- # Placeholder for main content
106
- main_placeholder = st.empty()
107
 
108
  # Initialize the Groq LLM
109
  llm = ChatGroq(
@@ -117,80 +119,92 @@ def save_faiss_index(vectorstore, path):
117
  def load_faiss_index(path, embeddings):
118
  return FAISS.load_local(path, embeddings, allow_dangerous_deserialization=True)
119
 
 
120
  if process_url_clicked:
121
- if not urls:
122
- main_placeholder.error("Please provide a valid URL.")
123
- else:
124
- try:
125
- main_placeholder.text("Data Loading...Started...βœ…βœ…βœ…")
126
- loader = WebBaseLoader(
127
- web_paths=urls,
128
- bs_kwargs={"parse_only": ["title", "p", "h1", "h2", "h3"]},
129
- requests_kwargs={"headers": {"User-Agent": "Mozilla/5.0"}}
130
- )
131
- data = loader.load()
132
-
133
- # Check loaded data
134
- if not data or all(len(doc.page_content.strip()) == 0 for doc in data):
135
- main_placeholder.error("No content loaded from URL. Try a different URL (e.g., https://www.bbc.com/news/science-environment-67299122).")
136
- st.stop()
137
-
138
- main_placeholder.text("Text Splitter...Started...βœ…βœ…βœ…")
139
- text_splitter = RecursiveCharacterTextSplitter(
140
- separators=['\n\n', '\n', '.', ','],
141
- chunk_size=1000
142
- )
143
- docs = text_splitter.split_documents(data)
144
-
145
- # Check document count
146
- if not docs:
147
- main_placeholder.error("No document chunks created. Try a different URL.")
148
- st.stop()
149
- main_placeholder.text(f"Split into {len(docs)} document chunks.")
150
-
151
- main_placeholder.text("Embedding Vector Started Building...βœ…βœ…βœ…")
152
- embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
153
- vectorstore_openai = FAISS.from_documents(docs, embeddings)
154
-
155
- save_faiss_index(vectorstore_openai, faiss_index_path)
156
- st.session_state.index_created = True
157
- main_placeholder.text("FAISS index saved successfully! βœ…βœ…βœ…")
158
- time.sleep(2)
159
- main_placeholder.empty()
160
- except Exception as e:
161
- main_placeholder.error(f"Error processing URL: {str(e)}")
 
 
 
 
 
 
 
 
 
 
162
 
163
  # Query input with Ask button
164
- with main_placeholder.container():
165
  st.header("Ask a Question")
166
  query = st.text_input("Question", placeholder="e.g., What is the article about?")
167
  ask_clicked = st.button("Ask")
168
 
169
  if ask_clicked and query:
170
- if not st.session_state.index_created or not os.path.exists(faiss_index_path):
171
- main_placeholder.error("No FAISS index found. Please process a URL first.")
172
- else:
173
- with st.spinner("Processing your question..."):
174
- try:
175
- embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
176
- vectorstore = load_faiss_index(faiss_index_path, embeddings)
177
- chain = RetrievalQAWithSourcesChain.from_llm(llm=llm, retriever=vectorstore.as_retriever())
178
- result = chain({"question": query}, return_only_outputs=True)
179
-
180
- if not result.get("answer"):
181
- main_placeholder.warning("No answer generated. Try a different question or URL.")
182
- st.stop()
183
-
184
- st.header("Answer")
185
- st.write(result["answer"])
186
-
187
- sources = result.get("sources", "")
188
- if sources:
189
- st.subheader("Sources:")
190
- sources_list = sources.split("\n")
191
- for source in sources_list:
192
- st.write(source)
193
- else:
194
- st.write("No sources found.")
195
- except Exception as e:
196
- main_placeholder.error(f"Error answering query: {str(e)}")
 
 
91
  # Set Streamlit app title
92
  st.title("WebChatter πŸ’¬")
93
 
94
+ # Initialize session state for FAISS index and processing status
95
  if "index_created" not in st.session_state:
96
  st.session_state.index_created = False
97
+ if "processing_status" not in st.session_state:
98
+ st.session_state.processing_status = ""
99
 
100
  # Sidebar for URL input
101
  with st.sidebar:
102
  st.header("Enter Web URL")
103
  url = st.text_input("URL", placeholder="e.g., https://www.bbc.com/news/science-environment-67299122")
104
+ urls = [url.strip()] if url.strip() else []
105
  process_url_clicked = st.button("Process URL")
106
 
107
+ # Main content container
108
+ main_container = st.container()
109
 
110
  # Initialize the Groq LLM
111
  llm = ChatGroq(
 
119
  def load_faiss_index(path, embeddings):
120
  return FAISS.load_local(path, embeddings, allow_dangerous_deserialization=True)
121
 
122
+ # Process URL
123
  if process_url_clicked:
124
+ with main_container:
125
+ if not urls:
126
+ st.error("Please provide a valid URL.")
127
+ else:
128
+ try:
129
+ st.session_state.processing_status = "Data Loading...Started...βœ…βœ…βœ…"
130
+ st.text(st.session_state.processing_status)
131
+ loader = WebBaseLoader(
132
+ web_paths=urls,
133
+ bs_kwargs={"parse_only": ["title", "p", "h1", "h2", "h3"]},
134
+ requests_kwargs={"headers": {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"}}
135
+ )
136
+ data = loader.load()
137
+
138
+ # Check loaded data
139
+ if not data or all(len(doc.page_content.strip()) == 0 for doc in data):
140
+ st.error("No content loaded from URL. Try a different URL (e.g., https://www.bbc.com/news/science-environment-67299122).")
141
+ st.session_state.processing_status = ""
142
+ st.stop()
143
+
144
+ st.session_state.processing_status = "Text Splitter...Started...βœ…βœ…βœ…"
145
+ st.text(st.session_state.processing_status)
146
+ text_splitter = RecursiveCharacterTextSplitter(
147
+ separators=['\n\n', '\n', '.', ','],
148
+ chunk_size=1000
149
+ )
150
+ docs = text_splitter.split_documents(data)
151
+
152
+ # Check document count
153
+ if not docs:
154
+ st.error("No document chunks created. Try a different URL.")
155
+ st.session_state.processing_status = ""
156
+ st.stop()
157
+ st.session_state.processing_status = f"Split into {len(docs)} document chunks."
158
+ st.text(st.session_state.processing_status)
159
+
160
+ st.session_state.processing_status = "Embedding Vector Started Building...βœ…βœ…βœ…"
161
+ st.text(st.session_state.processing_status)
162
+ embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
163
+ vectorstore_openai = FAISS.from_documents(docs, embeddings)
164
+
165
+ save_faiss_index(vectorstore_openai, faiss_index_path)
166
+ st.session_state.index_created = True
167
+ st.session_state.processing_status = "FAISS index saved successfully! βœ…βœ…βœ…"
168
+ st.text(st.session_state.processing_status)
169
+ time.sleep(2)
170
+ st.session_state.processing_status = ""
171
+ st.experimental_rerun() # Refresh to clear status messages
172
+ except Exception as e:
173
+ st.error(f"Error processing URL: {str(e)}")
174
+ st.session_state.processing_status = ""
175
 
176
  # Query input with Ask button
177
+ with main_container:
178
  st.header("Ask a Question")
179
  query = st.text_input("Question", placeholder="e.g., What is the article about?")
180
  ask_clicked = st.button("Ask")
181
 
182
  if ask_clicked and query:
183
+ with main_container:
184
+ if not st.session_state.index_created or not os.path.exists(faiss_index_path):
185
+ st.error("No FAISS index found. Please process a URL first.")
186
+ else:
187
+ with st.spinner("Processing your question..."):
188
+ try:
189
+ embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
190
+ vectorstore = load_faiss_index(faiss_index_path, embeddings)
191
+ chain = RetrievalQAWithSourcesChain.from_llm(llm=llm, retriever=vectorstore.as_retriever())
192
+ result = chain({"question": query}, return_only_outputs=True)
193
+
194
+ if not result.get("answer"):
195
+ st.warning("No answer generated. Try a different question or URL.")
196
+ st.stop()
197
+
198
+ st.header("Answer")
199
+ st.write(result["answer"])
200
+
201
+ sources = result.get("sources", "")
202
+ if sources:
203
+ st.subheader("Sources:")
204
+ sources_list = sources.split("\n")
205
+ for source in sources_list:
206
+ st.write(source)
207
+ else:
208
+ st.write("No sources found.")
209
+ except Exception as e:
210
+ st.error(f"Error answering query: {str(e)}")