Spaces:
Paused
Paused
| import gradio as gr | |
| import undetected_chromedriver as uc | |
| from selenium.webdriver.common.by import By | |
| from selenium.webdriver.support.ui import WebDriverWait | |
| from selenium.webdriver.support import expected_conditions as EC | |
| from selenium.common.exceptions import WebDriverException, TimeoutException | |
| from PIL import Image | |
| from io import BytesIO | |
| import time | |
| import random | |
| import os | |
| def create_stealth_driver(headless=False, mobile_view=False): | |
| """Create an undetected Chrome driver""" | |
| try: | |
| # Configure undetected-chromedriver options | |
| options = uc.ChromeOptions() | |
| # Basic options | |
| options.add_argument("--no-sandbox") | |
| options.add_argument("--disable-dev-shm-usage") | |
| options.add_argument("--disable-gpu") | |
| options.add_argument("--disable-software-rasterizer") | |
| # Performance optimizations | |
| options.add_argument("--disable-extensions") | |
| options.add_argument("--disable-plugins") | |
| options.add_argument("--disable-images") # Faster loading | |
| # Mobile view configuration | |
| if mobile_view: | |
| mobile_emulation = { | |
| "deviceMetrics": {"width": 375, "height": 812, "pixelRatio": 3.0}, | |
| "userAgent": "Mozilla/5.0 (iPhone; CPU iPhone OS 14_0 like Mac OS X) AppleWebKit/605.1.15" | |
| } | |
| options.add_experimental_option("mobileEmulation", mobile_emulation) | |
| # Headless mode if requested | |
| if headless: | |
| options.add_argument("--headless=new") | |
| # Create undetected Chrome driver | |
| driver = uc.Chrome( | |
| options=options, | |
| version_main=None, # Auto-detect Chrome version | |
| driver_executable_path=None, # Auto-download chromedriver | |
| browser_executable_path=None, # Use system Chrome | |
| use_subprocess=True, | |
| debug=False | |
| ) | |
| # Set window size | |
| if not mobile_view: | |
| driver.set_window_size(1920, 1080) | |
| return driver | |
| except Exception as e: | |
| print(f"Failed to create undetected Chrome driver: {e}") | |
| return None | |
| def wait_for_cloudflare(driver, timeout=30): | |
| """Wait for Cloudflare challenge to complete""" | |
| try: | |
| # Wait for page to be ready | |
| WebDriverWait(driver, timeout).until( | |
| lambda d: d.execute_script("return document.readyState") == "complete" | |
| ) | |
| # Check for Cloudflare challenge indicators | |
| cf_selectors = [ | |
| "div[class*='cf-']", | |
| "div[class*='cloudflare']", | |
| "div[id*='challenge']", | |
| "div[class*='challenge']" | |
| ] | |
| start_time = time.time() | |
| while time.time() - start_time < timeout: | |
| # Check if we're still on a challenge page | |
| current_url = driver.current_url | |
| page_source = driver.page_source.lower() | |
| # Common Cloudflare challenge indicators | |
| cf_indicators = [ | |
| 'checking your browser', | |
| 'cloudflare', | |
| 'please wait', | |
| 'ddos protection', | |
| 'checking if the site connection is secure' | |
| ] | |
| if not any(indicator in page_source for indicator in cf_indicators): | |
| print("β Cloudflare challenge passed or not present") | |
| return True | |
| print(f"β³ Waiting for Cloudflare challenge... ({int(time.time() - start_time)}s)") | |
| time.sleep(2) | |
| print("β οΈ Cloudflare challenge timeout") | |
| return False | |
| except TimeoutException: | |
| print("β οΈ Page load timeout") | |
| return False | |
| def take_screenshot(url, wait_time=10, use_mobile=False, headless_mode=True, retry_count=2): | |
| """Take a screenshot with undetected Chrome""" | |
| if not url.startswith(('http://', 'https://')): | |
| url = 'https://' + url | |
| for attempt in range(retry_count + 1): | |
| driver = None | |
| try: | |
| print(f"πΈ Attempt {attempt + 1} to screenshot: {url}") | |
| # Create undetected Chrome driver | |
| driver = create_stealth_driver(headless=headless_mode, mobile_view=use_mobile) | |
| if not driver: | |
| continue | |
| # Navigate with human-like delay | |
| time.sleep(random.uniform(2, 4)) | |
| print("π Navigating to URL...") | |
| driver.get(url) | |
| # Wait for Cloudflare if present | |
| if not wait_for_cloudflare(driver): | |
| if attempt < retry_count: | |
| print("π Retrying due to protection challenge...") | |
| continue | |
| else: | |
| print("β Failed to bypass protection after all attempts") | |
| # Additional wait for page to fully load | |
| print(f"β³ Waiting {wait_time}s for page to load...") | |
| time.sleep(wait_time) | |
| # Human-like behavior: scroll to trigger lazy loading | |
| try: | |
| driver.execute_script("window.scrollTo(0, document.body.scrollHeight/3);") | |
| time.sleep(1) | |
| driver.execute_script("window.scrollTo(0, document.body.scrollHeight/2);") | |
| time.sleep(1) | |
| driver.execute_script("window.scrollTo(0, 0);") | |
| time.sleep(2) | |
| except Exception as e: | |
| print(f"Scroll behavior failed: {e}") | |
| # Take screenshot | |
| print("π· Capturing screenshot...") | |
| screenshot = driver.get_screenshot_as_png() | |
| if len(screenshot) < 1000: | |
| print("β οΈ Screenshot too small, likely error page") | |
| if attempt < retry_count: | |
| continue | |
| return None | |
| print("β Screenshot captured successfully!") | |
| return Image.open(BytesIO(screenshot)) | |
| except WebDriverException as e: | |
| print(f"β WebDriver error (attempt {attempt + 1}): {e}") | |
| if attempt < retry_count: | |
| time.sleep(random.uniform(3, 6)) | |
| continue | |
| except Exception as e: | |
| print(f"β Unexpected error (attempt {attempt + 1}): {e}") | |
| if attempt < retry_count: | |
| continue | |
| finally: | |
| if driver: | |
| try: | |
| driver.quit() | |
| except: | |
| pass | |
| print("β All attempts failed") | |
| return None | |
| # Gradio interface with enhanced features | |
| def screenshot_interface(url, wait_time, use_mobile_view, headless_mode): | |
| """Gradio interface function""" | |
| if not url.strip(): | |
| return None, "Please enter a valid URL" | |
| # Adjust wait time based on user input | |
| wait_seconds = int(wait_time) if wait_time else 10 | |
| try: | |
| result = take_screenshot(url, wait_seconds, use_mobile_view, headless_mode) | |
| if result: | |
| return result, "β Screenshot captured successfully with undetected Chrome!" | |
| else: | |
| return None, "β Failed to capture screenshot. The site may have very strong protection." | |
| except Exception as e: | |
| return None, f"β Error: {str(e)}" | |
| # Create Gradio interface | |
| iface = gr.Interface( | |
| fn=screenshot_interface, | |
| inputs=[ | |
| gr.Textbox( | |
| label="Website URL", | |
| placeholder="https://example.com or example.com", | |
| value="https://github.com" | |
| ), | |
| gr.Slider( | |
| minimum=5, | |
| maximum=30, | |
| value=10, | |
| step=1, | |
| label="Wait Time (seconds)" | |
| ), | |
| gr.Checkbox( | |
| label="Mobile View", | |
| value=False | |
| ), | |
| gr.Checkbox( | |
| label="Headless Mode (Hidden Browser)", | |
| value=True | |
| ) | |
| ], | |
| outputs=[ | |
| gr.Image(type="pil", label="Screenshot"), | |
| gr.Textbox(label="Status", lines=2) | |
| ], | |
| title="π Undetected Chrome Screenshot Tool", | |
| description="Advanced screenshot tool using undetected-chromedriver to bypass Cloudflare and other bot protections.", | |
| theme="default" | |
| ) | |
| if __name__ == "__main__": | |
| iface.launch( | |
| share=False, | |
| inbrowser=True, | |
| show_error=True | |
| ) |