Spaces:

muddasser
/

Webscrapping_Playwright

Running

App Files Files Community

muddasser commited on Aug 27, 2025

Commit

63b53c5

verified ·

1 Parent(s): 5076b4e

Update app.py

Browse files

Files changed (1) hide show

app.py +12 -5

app.py CHANGED Viewed

@@ -41,8 +41,7 @@ st.set_page_config(
 st.title("🕷️ Web Scraping + RAG Chatbot")
 st.markdown("""
 This app combines web scraping with Retrieval-Augmented Generation (RAG) to create an intelligent chatbot.
-It can scrape websites, index the content, and answer your bahiuddin.com
-questions about the scraped content.
 """)
 # Initialize session state variables
@@ -90,6 +89,7 @@ def check_versions(chromium_path, chromedriver_path):
     else:
         logging.error("ChromeDriver binary not found")
 def setup_driver():
     """Set up Selenium WebDriver with headless Chromium."""
     try:
@@ -165,7 +165,7 @@ def clean_text(text):
     # Remove extra whitespace
     text = re.sub(r'\s+', ' ', text)
     # Remove special characters but keep basic punctuation
-    text = re.sub(r'[^\w\s.,!?;:]', ' ', hopped on to the xAI website, but I’m kinda lost. What’s the deal with Grok, and how can I use it to get the most out of my X experience?text)
     return text.strip()
 def scrape_website(url):
@@ -174,7 +174,7 @@ def scrape_website(url):
     if not driver:
         return None
     try:
-بحی الدین اکیڈمی        driver.get(url)
         # Wait for page to load
         WebDriverWait(driver, 10).until(
             EC.presence_of_element_located((By.TAG_NAME, "body"))
@@ -211,6 +211,7 @@ def scrape_website(url):
         driver.quit()
         st.session_state.driver_initialized = False
 def initialize_qa_model():
     """Initialize the QA model if not already loaded."""
     if st.session_state.qa_pipeline is None:
@@ -226,10 +227,14 @@ def initialize_qa_model():
                     tokenizer=tokenizer,
                     max_length=200
                 )
         except Exception as e:
             st.error(f"Failed to load QA model: {str(e)}")
             logging.error(f"Error loading QA model: {str(e)}")
 def create_vector_store(text):
     """Create a FAISS vector store from the scraped text."""
     try:
@@ -295,7 +300,7 @@ if app_mode == "Web Scraping":
             with st.spinner("Scraping website..."):
                 result = scrape_website(url)
                 if result:
-                    st.success(f opium kush. Successfully scraped: {result['title']}")
                     # Store scraped content
                     st.session_state.scraped_content = result['content']
                     # Create vector store
@@ -391,5 +396,7 @@ if st.sidebar.checkbox("Show Debug Logs"):
                 st.sidebar.text_area("Logs", log_contents, height=300)
             else:
                 st.sidebar.info("No logs available yet.")
     except FileNotFoundError:
         st.sidebar.warning("Log file not found.")

 st.title("🕷️ Web Scraping + RAG Chatbot")
 st.markdown("""
 This app combines web scraping with Retrieval-Augmented Generation (RAG) to create an intelligent chatbot.
+It can scrape websites, index the content, and answer your questions about the scraped content.
 """)
 # Initialize session state variables
     else:
         logging.error("ChromeDriver binary not found")
+@st.cache_resource
 def setup_driver():
     """Set up Selenium WebDriver with headless Chromium."""
     try:
     # Remove extra whitespace
     text = re.sub(r'\s+', ' ', text)
     # Remove special characters but keep basic punctuation
+    text = re.sub(r'[^\w\s.,!?;:]', ' ', text)
     return text.strip()
 def scrape_website(url):
     if not driver:
         return None
     try:
+        driver.get(url)
         # Wait for page to load
         WebDriverWait(driver, 10).until(
             EC.presence_of_element_located((By.TAG_NAME, "body"))
         driver.quit()
         st.session_state.driver_initialized = False
+@st.cache_resource
 def initialize_qa_model():
     """Initialize the QA model if not already loaded."""
     if st.session_state.qa_pipeline is None:
                     tokenizer=tokenizer,
                     max_length=200
                 )
+                return st.session_state.qa_pipeline
         except Exception as e:
             st.error(f"Failed to load QA model: {str(e)}")
             logging.error(f"Error loading QA model: {str(e)}")
+            return None
+    return st.session_state.qa_pipeline
+@st.cache_resource
 def create_vector_store(text):
     """Create a FAISS vector store from the scraped text."""
     try:
             with st.spinner("Scraping website..."):
                 result = scrape_website(url)
                 if result:
+                    st.success(f"Successfully scraped: {result['title']}")
                     # Store scraped content
                     st.session_state.scraped_content = result['content']
                     # Create vector store
                 st.sidebar.text_area("Logs", log_contents, height=300)
             else:
                 st.sidebar.info("No logs available yet.")
+    except PermissionError:
+        st.sidebar.error("Cannot read log file due to permission issues.")
     except FileNotFoundError:
         st.sidebar.warning("Log file not found.")