Spaces:

MahatirTusher
/

WebChatter

Sleeping

App Files Files Community

MahatirTusher commited on Apr 22, 2025

Commit

a2f22ef

verified ·

1 Parent(s): 85a4e5f

Update app.py

Browse files

Files changed (1) hide show

app.py +87 -73

app.py CHANGED Viewed

@@ -91,19 +91,21 @@ st.markdown("""
 # Set Streamlit app title
 st.title("WebChatter 💬")
-# Initialize session state for FAISS index
 if "index_created" not in st.session_state:
     st.session_state.index_created = False
 # Sidebar for URL input
 with st.sidebar:
     st.header("Enter Web URL")
     url = st.text_input("URL", placeholder="e.g., https://www.bbc.com/news/science-environment-67299122")
-    urls = [url] if url else []
     process_url_clicked = st.button("Process URL")
-# Placeholder for main content
-main_placeholder = st.empty()
 # Initialize the Groq LLM
 llm = ChatGroq(
@@ -117,80 +119,92 @@ def save_faiss_index(vectorstore, path):
 def load_faiss_index(path, embeddings):
     return FAISS.load_local(path, embeddings, allow_dangerous_deserialization=True)
 if process_url_clicked:
-    if not urls:
-        main_placeholder.error("Please provide a valid URL.")
-    else:
-        try:
-            main_placeholder.text("Data Loading...Started...✅✅✅")
-            loader = WebBaseLoader(
-                web_paths=urls,
-                bs_kwargs={"parse_only": ["title", "p", "h1", "h2", "h3"]},
-                requests_kwargs={"headers": {"User-Agent": "Mozilla/5.0"}}
-            )
-            data = loader.load()
-            # Check loaded data
-            if not data or all(len(doc.page_content.strip()) == 0 for doc in data):
-                main_placeholder.error("No content loaded from URL. Try a different URL (e.g., https://www.bbc.com/news/science-environment-67299122).")
-                st.stop()
-            main_placeholder.text("Text Splitter...Started...✅✅✅")
-            text_splitter = RecursiveCharacterTextSplitter(
-                separators=['\n\n', '\n', '.', ','],
-                chunk_size=1000
-            )
-            docs = text_splitter.split_documents(data)
-            # Check document count
-            if not docs:
-                main_placeholder.error("No document chunks created. Try a different URL.")
-                st.stop()
-            main_placeholder.text(f"Split into {len(docs)} document chunks.")
-            main_placeholder.text("Embedding Vector Started Building...✅✅✅")
-            embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
-            vectorstore_openai = FAISS.from_documents(docs, embeddings)
-            save_faiss_index(vectorstore_openai, faiss_index_path)
-            st.session_state.index_created = True
-            main_placeholder.text("FAISS index saved successfully! ✅✅✅")
-            time.sleep(2)
-            main_placeholder.empty()
-        except Exception as e:
-            main_placeholder.error(f"Error processing URL: {str(e)}")
 # Query input with Ask button
-with main_placeholder.container():
     st.header("Ask a Question")
     query = st.text_input("Question", placeholder="e.g., What is the article about?")
     ask_clicked = st.button("Ask")
 if ask_clicked and query:
-    if not st.session_state.index_created or not os.path.exists(faiss_index_path):
-        main_placeholder.error("No FAISS index found. Please process a URL first.")
-    else:
-        with st.spinner("Processing your question..."):
-            try:
-                embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
-                vectorstore = load_faiss_index(faiss_index_path, embeddings)
-                chain = RetrievalQAWithSourcesChain.from_llm(llm=llm, retriever=vectorstore.as_retriever())
-                result = chain({"question": query}, return_only_outputs=True)
-                if not result.get("answer"):
-                    main_placeholder.warning("No answer generated. Try a different question or URL.")
-                    st.stop()
-                st.header("Answer")
-                st.write(result["answer"])
-                sources = result.get("sources", "")
-                if sources:
-                    st.subheader("Sources:")
-                    sources_list = sources.split("\n")
-                    for source in sources_list:
-                        st.write(source)
-                else:
-                    st.write("No sources found.")
-            except Exception as e:
-                main_placeholder.error(f"Error answering query: {str(e)}")

 # Set Streamlit app title
 st.title("WebChatter 💬")
+# Initialize session state for FAISS index and processing status
 if "index_created" not in st.session_state:
     st.session_state.index_created = False
+if "processing_status" not in st.session_state:
+    st.session_state.processing_status = ""
 # Sidebar for URL input
 with st.sidebar:
     st.header("Enter Web URL")
     url = st.text_input("URL", placeholder="e.g., https://www.bbc.com/news/science-environment-67299122")
+    urls = [url.strip()] if url.strip() else []
     process_url_clicked = st.button("Process URL")
+# Main content container
+main_container = st.container()
 # Initialize the Groq LLM
 llm = ChatGroq(
 def load_faiss_index(path, embeddings):
     return FAISS.load_local(path, embeddings, allow_dangerous_deserialization=True)
+# Process URL
 if process_url_clicked:
+    with main_container:
+        if not urls:
+            st.error("Please provide a valid URL.")
+        else:
+            try:
+                st.session_state.processing_status = "Data Loading...Started...✅✅✅"
+                st.text(st.session_state.processing_status)
+                loader = WebBaseLoader(
+                    web_paths=urls,
+                    bs_kwargs={"parse_only": ["title", "p", "h1", "h2", "h3"]},
+                    requests_kwargs={"headers": {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"}}
+                )
+                data = loader.load()
+                # Check loaded data
+                if not data or all(len(doc.page_content.strip()) == 0 for doc in data):
+                    st.error("No content loaded from URL. Try a different URL (e.g., https://www.bbc.com/news/science-environment-67299122).")
+                    st.session_state.processing_status = ""
+                    st.stop()
+                st.session_state.processing_status = "Text Splitter...Started...✅✅✅"
+                st.text(st.session_state.processing_status)
+                text_splitter = RecursiveCharacterTextSplitter(
+                    separators=['\n\n', '\n', '.', ','],
+                    chunk_size=1000
+                )
+                docs = text_splitter.split_documents(data)
+                # Check document count
+                if not docs:
+                    st.error("No document chunks created. Try a different URL.")
+                    st.session_state.processing_status = ""
+                    st.stop()
+                st.session_state.processing_status = f"Split into {len(docs)} document chunks."
+                st.text(st.session_state.processing_status)
+                st.session_state.processing_status = "Embedding Vector Started Building...✅✅✅"
+                st.text(st.session_state.processing_status)
+                embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
+                vectorstore_openai = FAISS.from_documents(docs, embeddings)
+                save_faiss_index(vectorstore_openai, faiss_index_path)
+                st.session_state.index_created = True
+                st.session_state.processing_status = "FAISS index saved successfully! ✅✅✅"
+                st.text(st.session_state.processing_status)
+                time.sleep(2)
+                st.session_state.processing_status = ""
+                st.experimental_rerun()  # Refresh to clear status messages
+            except Exception as e:
+                st.error(f"Error processing URL: {str(e)}")
+                st.session_state.processing_status = ""
 # Query input with Ask button
+with main_container:
     st.header("Ask a Question")
     query = st.text_input("Question", placeholder="e.g., What is the article about?")
     ask_clicked = st.button("Ask")
 if ask_clicked and query:
+    with main_container:
+        if not st.session_state.index_created or not os.path.exists(faiss_index_path):
+            st.error("No FAISS index found. Please process a URL first.")
+        else:
+            with st.spinner("Processing your question..."):
+                try:
+                    embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
+                    vectorstore = load_faiss_index(faiss_index_path, embeddings)
+                    chain = RetrievalQAWithSourcesChain.from_llm(llm=llm, retriever=vectorstore.as_retriever())
+                    result = chain({"question": query}, return_only_outputs=True)
+                    if not result.get("answer"):
+                        st.warning("No answer generated. Try a different question or URL.")
+                        st.stop()
+                    st.header("Answer")
+                    st.write(result["answer"])
+                    sources = result.get("sources", "")
+                    if sources:
+                        st.subheader("Sources:")
+                        sources_list = sources.split("\n")
+                        for source in sources_list:
+                            st.write(source)
+                    else:
+                        st.write("No sources found.")
+                except Exception as e:
+                    st.error(f"Error answering query: {str(e)}")