Spaces:
Paused
Paused
| import json | |
| import re | |
| import os | |
| from urllib.parse import urlparse | |
| from CloudflareBypasser import CloudflareBypasser | |
| from DrissionPage import ChromiumPage, ChromiumOptions | |
| from fastapi import FastAPI, HTTPException, Response | |
| from pydantic import BaseModel | |
| from typing import Dict | |
| import argparse | |
| from pyvirtualdisplay import Display | |
| import uvicorn | |
| import atexit | |
| import time | |
| # Check if running in Docker mode | |
| DOCKER_MODE = os.getenv("DOCKERMODE", "false").lower() == "true" | |
| SERVER_PORT = int(os.getenv("SERVER_PORT", 8000)) | |
| # Chromium options arguments | |
| arguments = [ | |
| # "--remote-debugging-port=9222", # Add this line for remote debugging | |
| "-no-first-run", | |
| "-force-color-profile=srgb", | |
| "-metrics-recording-only", | |
| "-password-store=basic", | |
| "-use-mock-keychain", | |
| "-export-tagged-pdf", | |
| "-no-default-browser-check", | |
| "-disable-background-mode", | |
| "-enable-features=NetworkService,NetworkServiceInProcess,LoadCryptoTokenExtension,PermuteTLSExtensions", | |
| "-disable-features=FlashDeprecationWarning,EnablePasswordsAccountStorage", | |
| "-deny-permission-prompts", | |
| "-disable-gpu", | |
| "-accept-lang=en-US", | |
| #"-incognito" # You can add this line to open the browser in incognito mode by default | |
| ] | |
| browser_path = "/usr/bin/google-chrome" | |
| app = FastAPI() | |
| # Pydantic model for the response | |
| class CookieResponse(BaseModel): | |
| cookies: Dict[str, str] | |
| user_agent: str | |
| # Function to check if the URL is safe | |
| def is_safe_url(url: str) -> bool: | |
| parsed_url = urlparse(url) | |
| ip_pattern = re.compile( | |
| r"^(127\.0\.0\.1|localhost|0\.0\.0\.0|::1|10\.\d+\.\d+\.\d+|172\.1[6-9]\.\d+\.\d+|172\.2[0-9]\.\d+\.\d+|172\.3[0-1]\.\d+\.\d+|192\.168\.\d+\.\d+)$" | |
| ) | |
| hostname = parsed_url.hostname | |
| if (hostname and ip_pattern.match(hostname)) or parsed_url.scheme == "file": | |
| return False | |
| return True | |
| # Function to verify if the page has loaded properly | |
| def verify_page_loaded(driver: ChromiumPage) -> bool: | |
| """Verify if the page has loaded properly""" | |
| try: | |
| # Wait for body element to be present | |
| body = driver.ele('tag:body', timeout=10) | |
| # Check if page has actual content | |
| return len(body.html) > 100 | |
| except: | |
| return False | |
| # Function to bypass Cloudflare protection | |
| def bypass_cloudflare(url: str, retries: int, log: bool, proxy: str = None) -> ChromiumPage: | |
| max_load_retries = 3 | |
| for load_attempt in range(max_load_retries): | |
| options = ChromiumOptions().auto_port() | |
| if DOCKER_MODE: | |
| options.set_argument("--auto-open-devtools-for-tabs", "true") | |
| options.set_argument("--remote-debugging-port=9222") | |
| options.set_argument("--no-sandbox") # Necessary for Docker | |
| options.set_argument("--disable-gpu") # Optional, helps in some cases | |
| options.set_paths(browser_path=browser_path).headless(False) | |
| else: | |
| options.set_paths(browser_path=browser_path).headless(False) | |
| if proxy: | |
| options.set_proxy(proxy) | |
| driver = ChromiumPage(addr_or_opts=options) | |
| try: | |
| driver.get(url) | |
| # Wait for initial page load | |
| time.sleep(5) | |
| if not verify_page_loaded(driver): | |
| driver.quit() | |
| if load_attempt < max_load_retries - 1: | |
| time.sleep(3) | |
| continue | |
| else: | |
| raise Exception("Failed to load page properly after multiple attempts") | |
| cf_bypasser = CloudflareBypasser(driver, retries, log) | |
| cf_bypasser.bypass() | |
| return driver | |
| except Exception as e: | |
| driver.quit() | |
| if load_attempt < max_load_retries - 1: | |
| time.sleep(3) | |
| continue | |
| raise e | |
| # Endpoint to get cookies | |
| async def get_cookies(url: str, retries: int = 5, proxy: str = None): | |
| if not is_safe_url(url): | |
| raise HTTPException(status_code=400, detail="Invalid URL") | |
| try: | |
| driver = bypass_cloudflare(url, retries, log, proxy) | |
| cookies = {cookie.get("name", ""): cookie.get("value", " ") for cookie in driver.cookies()} | |
| user_agent = driver.user_agent | |
| driver.quit() | |
| return CookieResponse(cookies=cookies, user_agent=user_agent) | |
| except Exception as e: | |
| raise HTTPException(status_code=500, detail=str(e)) | |
| # Endpoint to get HTML content and cookies | |
| async def get_html(url: str, retries: int = 5, proxy: str = None): | |
| if not is_safe_url(url): | |
| raise HTTPException(status_code=400, detail="Invalid URL") | |
| try: | |
| driver = bypass_cloudflare(url, retries, log, proxy) | |
| html = driver.html | |
| cookies_json = {cookie.get("name", ""): cookie.get("value", " ") for cookie in driver.cookies()} | |
| response = Response(content=html, media_type="text/html") | |
| response.headers["cookies"] = json.dumps(cookies_json) | |
| response.headers["user_agent"] = driver.user_agent | |
| driver.quit() | |
| return response | |
| except Exception as e: | |
| raise HTTPException(status_code=500, detail=str(e)) | |
| # Main entry point | |
| if __name__ == "__main__": | |
| parser = argparse.ArgumentParser(description="Cloudflare bypass api") | |
| parser.add_argument("--nolog", action="store_true", help="Disable logging") | |
| parser.add_argument("--headless", action="store_true", help="Run in headless mode") | |
| args = parser.parse_args() | |
| display = None | |
| if args.headless or DOCKER_MODE: | |
| display = Display(visible=0, size=(1920, 1080)) | |
| display.start() | |
| def cleanup_display(): | |
| if display: | |
| display.stop() | |
| atexit.register(cleanup_display) | |
| if args.nolog: | |
| log = False | |
| else: | |
| log = True | |
| uvicorn.run(app, host="0.0.0.0", port=SERVER_PORT) |