tdurzynski commited on
Commit
5892725
·
verified ·
1 Parent(s): 9dd964d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +8 -4
app.py CHANGED
@@ -4,6 +4,7 @@ import gradio as gr
4
  import os
5
  from openai import OpenAI
6
  from selenium import webdriver
 
7
  from selenium.webdriver.chrome.options import Options
8
 
9
  # Initialize OpenAI client securely
@@ -35,13 +36,16 @@ def fetch_with_requests(url):
35
 
36
  def fetch_with_selenium(url):
37
  """
38
- Uses Selenium to scrape JavaScript-heavy pages if requests fails.
39
  """
40
  chrome_options = Options()
41
  chrome_options.add_argument("--headless") # Run in headless mode
42
- chrome_options.add_argument("--disable-blink-features=AutomationControlled")
43
-
44
- driver = webdriver.Chrome(options=chrome_options)
 
 
 
45
  driver.get(url)
46
  html = driver.page_source
47
  driver.quit()
 
4
  import os
5
  from openai import OpenAI
6
  from selenium import webdriver
7
+ import undetected_chromedriver as uc
8
  from selenium.webdriver.chrome.options import Options
9
 
10
  # Initialize OpenAI client securely
 
36
 
37
  def fetch_with_selenium(url):
38
  """
39
+ Uses Selenium with an undetected Chrome driver to scrape JavaScript-heavy pages.
40
  """
41
  chrome_options = Options()
42
  chrome_options.add_argument("--headless") # Run in headless mode
43
+ chrome_options.add_argument("--disable-gpu")
44
+ chrome_options.add_argument("--no-sandbox")
45
+ chrome_options.add_argument("--disable-dev-shm-usage")
46
+
47
+ driver = uc.Chrome(options=chrome_options)
48
+
49
  driver.get(url)
50
  html = driver.page_source
51
  driver.quit()