muddasser commited on
Commit
49c5926
·
verified ·
1 Parent(s): 42df0a5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +22 -9
app.py CHANGED
@@ -4,6 +4,12 @@ from selenium.webdriver.chrome.options import Options
4
  from selenium.webdriver.chrome.service import Service
5
  from webdriver_manager.chrome import ChromeDriverManager
6
  import time
 
 
 
 
 
 
7
  # Fix huggingface_hub cached_download
8
  import huggingface_hub
9
  if not hasattr(huggingface_hub, "cached_download"):
@@ -21,14 +27,21 @@ def scrape_with_selenium(url: str):
21
  chrome_options.add_argument("--no-sandbox")
22
  chrome_options.add_argument("--disable-dev-shm-usage")
23
  chrome_options.binary_location = "/usr/bin/chromium"
24
- service = Service(ChromeDriverManager().install())
25
- driver = webdriver.Chrome(service=service, options=chrome_options)
26
- driver.get(url)
27
- time.sleep(2)
28
- paragraphs = driver.find_elements("tag name", "p")
29
- text_data = [p.text for p in paragraphs if p.text.strip()]
30
- driver.quit()
31
- return text_data
 
 
 
 
 
 
 
32
 
33
  # 2️⃣ Build FAISS Index
34
  def build_faiss_index(text_data):
@@ -74,4 +87,4 @@ if "scraped_text" in st.session_state:
74
  st.subheader("💡 Answer")
75
  st.write(answer)
76
  except Exception as e:
77
- st.error(f"Error processing question: {str(e)}")
 
4
  from selenium.webdriver.chrome.service import Service
5
  from webdriver_manager.chrome import ChromeDriverManager
6
  import time
7
+ import logging
8
+
9
+ # Set up logging
10
+ logging.basicConfig(level=logging.INFO)
11
+ logger = logging.getLogger(__name__)
12
+
13
  # Fix huggingface_hub cached_download
14
  import huggingface_hub
15
  if not hasattr(huggingface_hub, "cached_download"):
 
27
  chrome_options.add_argument("--no-sandbox")
28
  chrome_options.add_argument("--disable-dev-shm-usage")
29
  chrome_options.binary_location = "/usr/bin/chromium"
30
+ try:
31
+ # Use webdriver-manager to get the correct ChromeDriver
32
+ logger.info("Installing ChromeDriver with webdriver-manager")
33
+ service = Service(ChromeDriverManager(driver_version=None).install())
34
+ driver = webdriver.Chrome(service=service, options=chrome_options)
35
+ logger.info(f"Navigating to {url}")
36
+ driver.get(url)
37
+ time.sleep(2)
38
+ paragraphs = driver.find_elements("tag name", "p")
39
+ text_data = [p.text for p in paragraphs if p.text.strip()]
40
+ driver.quit()
41
+ return text_data
42
+ except Exception as e:
43
+ logger.error(f"Selenium error: {str(e)}")
44
+ raise
45
 
46
  # 2️⃣ Build FAISS Index
47
  def build_faiss_index(text_data):
 
87
  st.subheader("💡 Answer")
88
  st.write(answer)
89
  except Exception as e:
90
+ st.error(f"Error processing question: {str(e)}")