Spaces:
Sleeping
Sleeping
File size: 8,219 Bytes
3b7ec04 1caefe4 3b7ec04 256f37c f8136bb 1caefe4 5e4ff04 1caefe4 f8136bb 3b7ec04 256f37c 3b7ec04 0c01d1e 1caefe4 5e4ff04 f8136bb 0c01d1e 1caefe4 0c01d1e 1caefe4 f8136bb 0c01d1e 1caefe4 5e4ff04 1caefe4 5e4ff04 3b7ec04 1caefe4 3b7ec04 f8136bb 1caefe4 f8136bb 1caefe4 f8136bb 1caefe4 f8136bb 1caefe4 f8136bb 1caefe4 f8136bb 1caefe4 f8136bb 3b7ec04 256f37c f8136bb 1caefe4 f8136bb 3b7ec04 f8136bb 3b7ec04 b68547c 3b7ec04 b68547c 3b7ec04 b68547c 3b7ec04 1caefe4 3b7ec04 1caefe4 3b7ec04 1caefe4 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 |
import gradio as gr
import tempfile
import os
import time
import traceback
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException
from subprocess import PIPE, STDOUT
import psutil
print("Gradio app loaded.")
def capture_page(url: str, output_file: str = "screenshot.png"):
"""
Captures a screenshot of the given webpage.
:param url: The URL of the webpage to capture.
:param output_file: The filename to save the screenshot.
"""
options = Options()
# Use new headless mode and basic options for Docker
options.add_argument('--headless=new')
options.add_argument('--no-sandbox')
options.add_argument('--disable-dev-shm-usage')
options.add_argument('--disable-gpu')
options.add_argument('--disable-software-rasterizer')
options.add_argument('--disable-extensions')
options.add_argument('--disable-infobars')
options.add_argument('--window-size=1920,1080')
options.add_argument('--disable-features=NetworkService,NetworkServiceInProcess')
options.add_argument('--disable-features=site-per-process')
options.add_argument('--single-process')
options.add_argument('--memory-pressure-off')
options.add_argument('--disable-crash-reporter')
options.add_argument('--disable-breakpad')
options.add_argument('--ignore-certificate-errors')
options.add_argument('--disable-setuid-sandbox')
options.add_argument('--disable-web-security')
options.add_argument('--shm-size=2g')
# Set page load strategy to 'none' to avoid waiting indefinitely
options.page_load_strategy = "none"
# Set up Chrome service (ensure chromedriver is in your PATH)
service = Service(
log_output=PIPE,
service_args=['--verbose']
)
driver = None
try:
print("Initializing Chrome...")
driver = webdriver.Chrome(service=service, options=options)
if not driver:
raise Exception("Failed to initialize Chrome driver")
print("Chrome initialized successfully")
driver.implicitly_wait(5)
try:
# Set a 30-second timeout for page load
driver.set_page_load_timeout(30)
try:
print(f"Navigating to URL: {url}")
driver.get(url)
except TimeoutException:
print("Page load timed out. Proceeding with screenshot capture...")
# Wait for the document ready state to be 'interactive' or 'complete'
try:
print("Waiting for document ready state...")
WebDriverWait(driver, 30).until(
lambda d: d.execute_script('return document.readyState') in ["interactive", "complete"]
)
except TimeoutException:
print("Document did not reach ready state within timeout, proceeding anyway.")
# Additional short delay to allow dynamic content to settle
time.sleep(2)
print("Taking screenshot...")
driver.save_screenshot(output_file)
print(f"Screenshot saved: {output_file}")
return True
except Exception as e:
print(f"Error during page capture: {str(e)}")
raise
finally:
print("Closing Chrome...")
# Wrap cleanup in a try/except to prevent errors if the session is already closed
if driver:
try:
driver.quit()
except Exception as cleanup_error:
print(f"Error during driver.quit(): {cleanup_error}")
# Optionally clean up any lingering Chrome processes
try:
current_pid = os.getpid()
current_process = psutil.Process(current_pid)
for child in current_process.children(recursive=True):
if 'chrome' in child.name().lower():
child.terminate()
except Exception as psutil_error:
print(f"Error during process cleanup: {psutil_error}")
except Exception as e:
print(f"Error initializing Chrome: {str(e)}")
raise Exception(f"Failed to initialize Chrome: {str(e)}")
def capture_and_show(url: str):
"""Capture webpage and return the image"""
try:
temp_dir = os.getenv('TMPDIR', '/tmp')
try:
os.makedirs(temp_dir, mode=0o777, exist_ok=True)
print(f"Using temp directory: {temp_dir}")
if not os.access(temp_dir, os.W_OK):
print(f"Warning: Temp directory {temp_dir} is not writable")
temp_dir = os.path.join('/tmp', f'chrome_screenshots_{os.getuid()}')
os.makedirs(temp_dir, mode=0o777, exist_ok=True)
print(f"Created user-specific temp directory: {temp_dir}")
temp_path = os.path.join(temp_dir, f"screenshot_{os.urandom(8).hex()}.png")
print(f"Temp file path: {temp_path}")
success = capture_page(url, temp_path)
if not success:
print("Screenshot capture returned False")
return None
if not os.path.exists(temp_path):
print("Screenshot file was not created")
return None
print("Screenshot captured successfully")
return temp_path
except OSError as e:
print(f"OS Error: {str(e)}")
print(f"Stack trace: {traceback.format_exc()}")
return None
except Exception as e:
print(f"Error in capture_and_show: {str(e)}")
print(f"Stack trace: {traceback.format_exc()}")
return None
def create_gradio_app():
"""Create the main Gradio application with all components"""
with gr.Blocks() as app:
gr.Markdown("# Webpage Screenshot Capture")
with gr.Row():
url_input = gr.Textbox(
label="Website URL",
placeholder="Enter website URL (e.g., https://www.example.com)",
scale=4
)
capture_btn = gr.Button("Capture", scale=1)
with gr.Row():
output_image = gr.Image(
label="Captured Screenshot",
type="filepath"
)
error_output = gr.Textbox(
label="Error Message",
visible=False,
interactive=False
)
def capture_with_error(url):
try:
if not url:
return None, gr.update(visible=True, value="Please enter a URL")
if not url.startswith(('http://', 'https://')):
return None, gr.update(visible=True, value="Please enter a valid URL starting with http:// or https://")
result = capture_and_show(url)
if result is None:
return None, gr.update(visible=True, value="Failed to capture screenshot. Please check the URL and try again.")
return result, gr.update(visible=False, value="")
except Exception as e:
return None, gr.update(visible=True, value=f"Error: {str(e)}")
capture_btn.click(
fn=capture_with_error,
inputs=[url_input],
outputs=[output_image, error_output]
)
return app
app = create_gradio_app()
server_port = 7860
server_name = "0.0.0.0"
def main():
print("Starting Gradio server...")
app.launch(
server_name=server_name,
server_port=server_port,
share=False,
auth=None,
ssl_verify=False,
show_error=True,
favicon_path=None
)
main()
|