Spaces:
Sleeping
Sleeping
| import os | |
| import tempfile | |
| import socket | |
| import traceback | |
| from urllib.parse import urlparse | |
| from playwright.sync_api import sync_playwright | |
| import gradio as gr | |
| BROWSER_ARGS = [ | |
| "--no-sandbox", | |
| "--disable-setuid-sandbox", | |
| "--disable-dev-shm-usage", | |
| "--disable-gpu", | |
| "--single-process", | |
| "--no-zygote", | |
| ] | |
| def open_and_screenshot(url="https://example.com"): | |
| diag = {} | |
| screenshot_path = None | |
| title = "" | |
| # quick env/proxy info | |
| diag['HTTP_PROXY'] = os.environ.get('HTTP_PROXY') or os.environ.get('http_proxy') | |
| diag['HTTPS_PROXY'] = os.environ.get('HTTPS_PROXY') or os.environ.get('https_proxy') | |
| diag['NO_PROXY'] = os.environ.get('NO_PROXY') or os.environ.get('no_proxy') | |
| # parse host for DNS test | |
| try: | |
| parsed = urlparse(url) | |
| host = parsed.netloc or parsed.path | |
| # strip possible port | |
| host = host.split(':')[0] | |
| diag['url_host'] = host | |
| except Exception as e: | |
| host = None | |
| diag['url_host_parse_error'] = str(e) | |
| # DNS resolution | |
| if host: | |
| try: | |
| resolved_ip = socket.gethostbyname(host) | |
| diag['resolved_ip'] = resolved_ip | |
| except Exception as e: | |
| diag['resolved_ip_error'] = repr(e) | |
| resolved_ip = None | |
| else: | |
| resolved_ip = None | |
| # optional pre-check with requests (safe, wrapped) | |
| try: | |
| import requests | |
| try: | |
| resp = requests.get(url, timeout=6) | |
| diag['requests_status_code'] = resp.status_code | |
| except Exception as e: | |
| diag['requests_error'] = repr(e) | |
| except Exception: | |
| diag['requests_not_installed'] = True | |
| # Playwright diagnostics + navigation attempts | |
| try: | |
| with sync_playwright() as pw: | |
| # executable path (property, not callable) | |
| exec_path = getattr(pw.chromium, "executable_path", None) | |
| diag['playwright_chromium_executable'] = exec_path | |
| browser = pw.chromium.launch(headless=True, args=BROWSER_ARGS) | |
| ctx = browser.new_context() | |
| page = ctx.new_page() | |
| # increase navigation timeout to 60s (60000 ms) | |
| page.set_default_navigation_timeout(60000) | |
| try: | |
| page.goto(url, wait_until="domcontentloaded", timeout=60000) | |
| title = page.title() | |
| # take screenshot | |
| tmp = tempfile.NamedTemporaryFile(suffix=".png", delete=False) | |
| screenshot_path = tmp.name | |
| page.screenshot(path=screenshot_path, full_page=True) | |
| diag['playwright_result'] = "success" | |
| diag['navigator_user_agent'] = page.evaluate("navigator.userAgent") | |
| except Exception as e_goto: | |
| diag['playwright_goto_trace'] = traceback.format_exc() | |
| # fallback 1: try accessing by resolved IP (bypass DNS), if we have one | |
| if resolved_ip: | |
| try: | |
| # try plain http to the resolved IP (some hosts block https by IP) | |
| alt_url = f"http://{resolved_ip}/" | |
| page.goto(alt_url, wait_until="domcontentloaded", timeout=15000) | |
| title = page.title() | |
| tmp = tempfile.NamedTemporaryFile(suffix=".png", delete=False) | |
| screenshot_path = tmp.name | |
| page.screenshot(path=screenshot_path, full_page=True) | |
| diag['playwright_fallback_ip'] = f"success via {alt_url}" | |
| except Exception: | |
| diag['playwright_fallback_ip_trace'] = traceback.format_exc() | |
| # fallback 2: try switching https <-> http on original hostname | |
| try: | |
| if url.startswith("https://"): | |
| alt_url2 = url.replace("https://", "http://", 1) | |
| else: | |
| alt_url2 = url.replace("http://", "https://", 1) | |
| page.goto(alt_url2, wait_until="domcontentloaded", timeout=15000) | |
| title = page.title() | |
| tmp = tempfile.NamedTemporaryFile(suffix=".png", delete=False) | |
| screenshot_path = tmp.name | |
| page.screenshot(path=screenshot_path, full_page=True) | |
| diag['playwright_fallback_proto'] = f"success via {alt_url2}" | |
| except Exception: | |
| diag['playwright_fallback_proto_trace'] = traceback.format_exc() | |
| try: | |
| browser.close() | |
| except Exception: | |
| pass | |
| except Exception: | |
| diag['playwright_launch_trace'] = traceback.format_exc() | |
| # flatten diag for return | |
| diag_lines = [] | |
| for k, v in diag.items(): | |
| diag_lines.append(f"{k}: {v}") | |
| diag_text = "\n".join(diag_lines) | |
| return title, screenshot_path, diag_text | |
| # Example Gradio UI (replace your existing Blocks UI's call) | |
| with gr.Blocks(title="Chromium-launch Space (Playwright)") as demo: | |
| url_input = gr.Textbox(value="https://example.com", label="URL") | |
| run_btn = gr.Button("Open URL in Chromium") | |
| output_title = gr.Textbox(label="Page title") | |
| output_img = gr.Image(label="Screenshot") | |
| output_diag = gr.Textbox(label="Diagnostics", lines=12) | |
| run_btn.click(fn=open_and_screenshot, inputs=url_input, outputs=[output_title, output_img, output_diag]) | |
| if __name__ == "__main__": | |
| demo.launch(server_name="0.0.0.0", server_port=int(os.environ.get("PORT", 7860))) | |