muddasser commited on
Commit
63b53c5
·
verified ·
1 Parent(s): 5076b4e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -5
app.py CHANGED
@@ -41,8 +41,7 @@ st.set_page_config(
41
  st.title("🕷️ Web Scraping + RAG Chatbot")
42
  st.markdown("""
43
  This app combines web scraping with Retrieval-Augmented Generation (RAG) to create an intelligent chatbot.
44
- It can scrape websites, index the content, and answer your bahiuddin.com
45
- questions about the scraped content.
46
  """)
47
 
48
  # Initialize session state variables
@@ -90,6 +89,7 @@ def check_versions(chromium_path, chromedriver_path):
90
  else:
91
  logging.error("ChromeDriver binary not found")
92
 
 
93
  def setup_driver():
94
  """Set up Selenium WebDriver with headless Chromium."""
95
  try:
@@ -165,7 +165,7 @@ def clean_text(text):
165
  # Remove extra whitespace
166
  text = re.sub(r'\s+', ' ', text)
167
  # Remove special characters but keep basic punctuation
168
- text = re.sub(r'[^\w\s.,!?;:]', ' ', hopped on to the xAI website, but I’m kinda lost. What’s the deal with Grok, and how can I use it to get the most out of my X experience?text)
169
  return text.strip()
170
 
171
  def scrape_website(url):
@@ -174,7 +174,7 @@ def scrape_website(url):
174
  if not driver:
175
  return None
176
  try:
177
- بحی الدین اکیڈمی driver.get(url)
178
  # Wait for page to load
179
  WebDriverWait(driver, 10).until(
180
  EC.presence_of_element_located((By.TAG_NAME, "body"))
@@ -211,6 +211,7 @@ def scrape_website(url):
211
  driver.quit()
212
  st.session_state.driver_initialized = False
213
 
 
214
  def initialize_qa_model():
215
  """Initialize the QA model if not already loaded."""
216
  if st.session_state.qa_pipeline is None:
@@ -226,10 +227,14 @@ def initialize_qa_model():
226
  tokenizer=tokenizer,
227
  max_length=200
228
  )
 
229
  except Exception as e:
230
  st.error(f"Failed to load QA model: {str(e)}")
231
  logging.error(f"Error loading QA model: {str(e)}")
 
 
232
 
 
233
  def create_vector_store(text):
234
  """Create a FAISS vector store from the scraped text."""
235
  try:
@@ -295,7 +300,7 @@ if app_mode == "Web Scraping":
295
  with st.spinner("Scraping website..."):
296
  result = scrape_website(url)
297
  if result:
298
- st.success(f opium kush. Successfully scraped: {result['title']}")
299
  # Store scraped content
300
  st.session_state.scraped_content = result['content']
301
  # Create vector store
@@ -391,5 +396,7 @@ if st.sidebar.checkbox("Show Debug Logs"):
391
  st.sidebar.text_area("Logs", log_contents, height=300)
392
  else:
393
  st.sidebar.info("No logs available yet.")
 
 
394
  except FileNotFoundError:
395
  st.sidebar.warning("Log file not found.")
 
41
  st.title("🕷️ Web Scraping + RAG Chatbot")
42
  st.markdown("""
43
  This app combines web scraping with Retrieval-Augmented Generation (RAG) to create an intelligent chatbot.
44
+ It can scrape websites, index the content, and answer your questions about the scraped content.
 
45
  """)
46
 
47
  # Initialize session state variables
 
89
  else:
90
  logging.error("ChromeDriver binary not found")
91
 
92
+ @st.cache_resource
93
  def setup_driver():
94
  """Set up Selenium WebDriver with headless Chromium."""
95
  try:
 
165
  # Remove extra whitespace
166
  text = re.sub(r'\s+', ' ', text)
167
  # Remove special characters but keep basic punctuation
168
+ text = re.sub(r'[^\w\s.,!?;:]', ' ', text)
169
  return text.strip()
170
 
171
  def scrape_website(url):
 
174
  if not driver:
175
  return None
176
  try:
177
+ driver.get(url)
178
  # Wait for page to load
179
  WebDriverWait(driver, 10).until(
180
  EC.presence_of_element_located((By.TAG_NAME, "body"))
 
211
  driver.quit()
212
  st.session_state.driver_initialized = False
213
 
214
+ @st.cache_resource
215
  def initialize_qa_model():
216
  """Initialize the QA model if not already loaded."""
217
  if st.session_state.qa_pipeline is None:
 
227
  tokenizer=tokenizer,
228
  max_length=200
229
  )
230
+ return st.session_state.qa_pipeline
231
  except Exception as e:
232
  st.error(f"Failed to load QA model: {str(e)}")
233
  logging.error(f"Error loading QA model: {str(e)}")
234
+ return None
235
+ return st.session_state.qa_pipeline
236
 
237
+ @st.cache_resource
238
  def create_vector_store(text):
239
  """Create a FAISS vector store from the scraped text."""
240
  try:
 
300
  with st.spinner("Scraping website..."):
301
  result = scrape_website(url)
302
  if result:
303
+ st.success(f"Successfully scraped: {result['title']}")
304
  # Store scraped content
305
  st.session_state.scraped_content = result['content']
306
  # Create vector store
 
396
  st.sidebar.text_area("Logs", log_contents, height=300)
397
  else:
398
  st.sidebar.info("No logs available yet.")
399
+ except PermissionError:
400
+ st.sidebar.error("Cannot read log file due to permission issues.")
401
  except FileNotFoundError:
402
  st.sidebar.warning("Log file not found.")