Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -4,6 +4,7 @@ import gradio as gr
|
|
| 4 |
import os
|
| 5 |
from openai import OpenAI
|
| 6 |
from selenium import webdriver
|
|
|
|
| 7 |
from selenium.webdriver.chrome.options import Options
|
| 8 |
|
| 9 |
# Initialize OpenAI client securely
|
|
@@ -35,13 +36,16 @@ def fetch_with_requests(url):
|
|
| 35 |
|
| 36 |
def fetch_with_selenium(url):
|
| 37 |
"""
|
| 38 |
-
Uses Selenium to scrape JavaScript-heavy pages
|
| 39 |
"""
|
| 40 |
chrome_options = Options()
|
| 41 |
chrome_options.add_argument("--headless") # Run in headless mode
|
| 42 |
-
chrome_options.add_argument("--disable-
|
| 43 |
-
|
| 44 |
-
|
|
|
|
|
|
|
|
|
|
| 45 |
driver.get(url)
|
| 46 |
html = driver.page_source
|
| 47 |
driver.quit()
|
|
|
|
| 4 |
import os
|
| 5 |
from openai import OpenAI
|
| 6 |
from selenium import webdriver
|
| 7 |
+
import undetected_chromedriver as uc
|
| 8 |
from selenium.webdriver.chrome.options import Options
|
| 9 |
|
| 10 |
# Initialize OpenAI client securely
|
|
|
|
| 36 |
|
| 37 |
def fetch_with_selenium(url):
|
| 38 |
"""
|
| 39 |
+
Uses Selenium with an undetected Chrome driver to scrape JavaScript-heavy pages.
|
| 40 |
"""
|
| 41 |
chrome_options = Options()
|
| 42 |
chrome_options.add_argument("--headless") # Run in headless mode
|
| 43 |
+
chrome_options.add_argument("--disable-gpu")
|
| 44 |
+
chrome_options.add_argument("--no-sandbox")
|
| 45 |
+
chrome_options.add_argument("--disable-dev-shm-usage")
|
| 46 |
+
|
| 47 |
+
driver = uc.Chrome(options=chrome_options)
|
| 48 |
+
|
| 49 |
driver.get(url)
|
| 50 |
html = driver.page_source
|
| 51 |
driver.quit()
|