Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -4,6 +4,12 @@ from selenium.webdriver.chrome.options import Options
|
|
| 4 |
from selenium.webdriver.chrome.service import Service
|
| 5 |
from webdriver_manager.chrome import ChromeDriverManager
|
| 6 |
import time
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 7 |
# Fix huggingface_hub cached_download
|
| 8 |
import huggingface_hub
|
| 9 |
if not hasattr(huggingface_hub, "cached_download"):
|
|
@@ -21,14 +27,21 @@ def scrape_with_selenium(url: str):
|
|
| 21 |
chrome_options.add_argument("--no-sandbox")
|
| 22 |
chrome_options.add_argument("--disable-dev-shm-usage")
|
| 23 |
chrome_options.binary_location = "/usr/bin/chromium"
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 32 |
|
| 33 |
# 2️⃣ Build FAISS Index
|
| 34 |
def build_faiss_index(text_data):
|
|
@@ -74,4 +87,4 @@ if "scraped_text" in st.session_state:
|
|
| 74 |
st.subheader("💡 Answer")
|
| 75 |
st.write(answer)
|
| 76 |
except Exception as e:
|
| 77 |
-
st.error(f"Error processing question: {str(e)}")
|
|
|
|
| 4 |
from selenium.webdriver.chrome.service import Service
|
| 5 |
from webdriver_manager.chrome import ChromeDriverManager
|
| 6 |
import time
|
| 7 |
+
import logging
|
| 8 |
+
|
| 9 |
+
# Set up logging
|
| 10 |
+
logging.basicConfig(level=logging.INFO)
|
| 11 |
+
logger = logging.getLogger(__name__)
|
| 12 |
+
|
| 13 |
# Fix huggingface_hub cached_download
|
| 14 |
import huggingface_hub
|
| 15 |
if not hasattr(huggingface_hub, "cached_download"):
|
|
|
|
| 27 |
chrome_options.add_argument("--no-sandbox")
|
| 28 |
chrome_options.add_argument("--disable-dev-shm-usage")
|
| 29 |
chrome_options.binary_location = "/usr/bin/chromium"
|
| 30 |
+
try:
|
| 31 |
+
# Use webdriver-manager to get the correct ChromeDriver
|
| 32 |
+
logger.info("Installing ChromeDriver with webdriver-manager")
|
| 33 |
+
service = Service(ChromeDriverManager(driver_version=None).install())
|
| 34 |
+
driver = webdriver.Chrome(service=service, options=chrome_options)
|
| 35 |
+
logger.info(f"Navigating to {url}")
|
| 36 |
+
driver.get(url)
|
| 37 |
+
time.sleep(2)
|
| 38 |
+
paragraphs = driver.find_elements("tag name", "p")
|
| 39 |
+
text_data = [p.text for p in paragraphs if p.text.strip()]
|
| 40 |
+
driver.quit()
|
| 41 |
+
return text_data
|
| 42 |
+
except Exception as e:
|
| 43 |
+
logger.error(f"Selenium error: {str(e)}")
|
| 44 |
+
raise
|
| 45 |
|
| 46 |
# 2️⃣ Build FAISS Index
|
| 47 |
def build_faiss_index(text_data):
|
|
|
|
| 87 |
st.subheader("💡 Answer")
|
| 88 |
st.write(answer)
|
| 89 |
except Exception as e:
|
| 90 |
+
st.error(f"Error processing question: {str(e)}")
|