Update app.py
Browse files
app.py
CHANGED
|
@@ -17,7 +17,7 @@ logging.basicConfig(filename='app.log', level=logging.DEBUG,
|
|
| 17 |
st.title("Web Scraper App")
|
| 18 |
|
| 19 |
def find_binary(binary_name):
|
| 20 |
-
"""Find binary path using 'find' command
|
| 21 |
try:
|
| 22 |
result = subprocess.check_output(
|
| 23 |
['find', '/', '-name', binary_name, '-type', 'f', '-executable'],
|
|
@@ -61,16 +61,42 @@ def check_versions(chromium_path, chromedriver_path):
|
|
| 61 |
def setup_driver():
|
| 62 |
"""Set up Selenium WebDriver with headless Chromium."""
|
| 63 |
try:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 64 |
# Search for binaries
|
| 65 |
-
chromium_path =
|
| 66 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 67 |
|
| 68 |
# Log versions for debugging
|
| 69 |
check_versions(chromium_path, chromedriver_path)
|
| 70 |
|
| 71 |
if not chromium_path:
|
| 72 |
logging.warning("No Chromium binary found, attempting to proceed with webdriver-manager")
|
| 73 |
-
chromium_path = '/usr/
|
| 74 |
|
| 75 |
# Configure Chrome options for headless Chromium in Docker
|
| 76 |
options = Options()
|
|
@@ -80,13 +106,16 @@ def setup_driver():
|
|
| 80 |
options.add_argument('--disable-gpu') # Disable GPU
|
| 81 |
options.binary_location = chromium_path
|
| 82 |
|
| 83 |
-
#
|
| 84 |
if chromedriver_path and os.path.exists(chromedriver_path):
|
|
|
|
| 85 |
logging.info(f"Using system ChromeDriver at {chromedriver_path}")
|
| 86 |
service = Service(chromedriver_path)
|
| 87 |
else:
|
| 88 |
logging.info("Falling back to webdriver-manager for ChromeDriver")
|
| 89 |
-
|
|
|
|
|
|
|
| 90 |
|
| 91 |
# Initialize WebDriver
|
| 92 |
driver = webdriver.Chrome(service=service, options=options)
|
|
|
|
| 17 |
st.title("Web Scraper App")
|
| 18 |
|
| 19 |
def find_binary(binary_name):
|
| 20 |
+
"""Find binary path using 'find' command."""
|
| 21 |
try:
|
| 22 |
result = subprocess.check_output(
|
| 23 |
['find', '/', '-name', binary_name, '-type', 'f', '-executable'],
|
|
|
|
| 61 |
def setup_driver():
|
| 62 |
"""Set up Selenium WebDriver with headless Chromium."""
|
| 63 |
try:
|
| 64 |
+
# Define possible binary paths for Ubuntu
|
| 65 |
+
possible_chromium_paths = [
|
| 66 |
+
'/usr/lib/chromium-browser/chromium-browser',
|
| 67 |
+
'/usr/bin/chromium-browser',
|
| 68 |
+
'/usr/bin/chromium',
|
| 69 |
+
'/usr/bin/chrome'
|
| 70 |
+
]
|
| 71 |
+
possible_chromedriver_paths = [
|
| 72 |
+
'/usr/lib/chromium-browser/chromedriver',
|
| 73 |
+
'/usr/bin/chromedriver',
|
| 74 |
+
'/home/user/.wdm/drivers/chromedriver/linux64/114.0.5735.90/chromedriver'
|
| 75 |
+
]
|
| 76 |
+
|
| 77 |
# Search for binaries
|
| 78 |
+
chromium_path = None
|
| 79 |
+
for path in possible_chromium_paths:
|
| 80 |
+
if os.path.exists(path):
|
| 81 |
+
chromium_path = path
|
| 82 |
+
break
|
| 83 |
+
if not chromium_path:
|
| 84 |
+
chromium_path = find_binary('chromium') or find_binary('chromium-browser') or find_binary('chrome')
|
| 85 |
+
|
| 86 |
+
chromedriver_path = None
|
| 87 |
+
for path in possible_chromedriver_paths:
|
| 88 |
+
if os.path.exists(path):
|
| 89 |
+
chromedriver_path = path
|
| 90 |
+
break
|
| 91 |
+
if not chromedriver_path:
|
| 92 |
+
chromedriver_path = find_binary('chromedriver')
|
| 93 |
|
| 94 |
# Log versions for debugging
|
| 95 |
check_versions(chromium_path, chromedriver_path)
|
| 96 |
|
| 97 |
if not chromium_path:
|
| 98 |
logging.warning("No Chromium binary found, attempting to proceed with webdriver-manager")
|
| 99 |
+
chromium_path = '/usr/lib/chromium-browser/chromium-browser' # Fallback path
|
| 100 |
|
| 101 |
# Configure Chrome options for headless Chromium in Docker
|
| 102 |
options = Options()
|
|
|
|
| 106 |
options.add_argument('--disable-gpu') # Disable GPU
|
| 107 |
options.binary_location = chromium_path
|
| 108 |
|
| 109 |
+
# Ensure ChromeDriver is executable
|
| 110 |
if chromedriver_path and os.path.exists(chromedriver_path):
|
| 111 |
+
subprocess.run(['chmod', '+x', chromedriver_path], check=True)
|
| 112 |
logging.info(f"Using system ChromeDriver at {chromedriver_path}")
|
| 113 |
service = Service(chromedriver_path)
|
| 114 |
else:
|
| 115 |
logging.info("Falling back to webdriver-manager for ChromeDriver")
|
| 116 |
+
chromedriver_path = ChromeDriverManager(chrome_type=ChromeType.CHROMIUM).install()
|
| 117 |
+
subprocess.run(['chmod', '+x', chromedriver_path], check=True)
|
| 118 |
+
service = Service(chromedriver_path)
|
| 119 |
|
| 120 |
# Initialize WebDriver
|
| 121 |
driver = webdriver.Chrome(service=service, options=options)
|