from fastapi import FastAPI, Form from fastapi.responses import JSONResponse, HTMLResponse import requests from bs4 import BeautifulSoup from fake_useragent import UserAgent import os # Stealth engine from stealth_browser import ( launch_stealth_browser, stealth_goto, close_browser ) app = FastAPI(title="Ultra Powerful Scraper") # -------------------------------- # Utils # -------------------------------- def get_headers(): ua = UserAgent() return { "User-Agent": ua.random, "Accept-Language": "en-US,en;q=0.9", "Accept": "text/html,application/xhtml+xml", "Connection": "keep-alive", "DNT": "1", } def is_cloudflare_page(html: str) -> bool: markers = [ "cf-browser-verification", "cloudflare", "Attention Required!", "Checking your browser", "/cdn-cgi/" ] html_lower = html.lower() return any(m.lower() in html_lower for m in markers) def parse_html(html: str): soup = BeautifulSoup(html, "lxml") return { "title": soup.title.string.strip() if soup.title else "", "headings": [h.get_text(strip=True) for h in soup.find_all(["h1", "h2", "h3"])], "paragraphs": [p.get_text(strip=True) for p in soup.find_all("p")], "links": list(set(a["href"] for a in soup.find_all("a", href=True))), } # -------------------------------- # Static scrape # -------------------------------- def static_scrape(url: str): r = requests.get(url, headers=get_headers(), timeout=20) r.raise_for_status() if is_cloudflare_page(r.text): raise RuntimeError("Cloudflare detected") data = parse_html(r.text) return { "success": True, "engine": "requests", "bypass": "not_needed", **data } # -------------------------------- # Stealth scrape (Cloudflare-aware) # -------------------------------- def stealth_scrape(url: str): p, browser, context, page = launch_stealth_browser(headless=True) try: html = stealth_goto(page, url) data = parse_html(html) return { "success": True, "engine": "playwright-stealth", "bypass": "attempted", **data } finally: close_browser(p, browser) # -------------------------------- # API # -------------------------------- @app.post("/scrape") def scrape(url: str = Form(...)): if not url.startswith("http"): return JSONResponse( status_code=400, content={ "success": False, "error": "Invalid URL. Must start with http or https." } ) logs = [] # 1️⃣ Try static try: logs.append("Trying static scraping (requests)") result = static_scrape(url) logs.append("Static scrape successful") result["logs"] = logs return JSONResponse(result) except Exception as e: logs.append(f"Static failed: {str(e)}") # 2️⃣ Fallback to stealth try: logs.append("Switching to stealth browser (Cloudflare bypass)") result = stealth_scrape(url) logs.append("Stealth scrape completed") result["logs"] = logs return JSONResponse(result) except Exception as e: logs.append(f"Stealth failed: {str(e)}") return JSONResponse( status_code=500, content={ "success": False, "error": "All scraping methods failed", "logs": logs } ) # -------------------------------- # Serve UI # -------------------------------- @app.get("/", response_class=HTMLResponse) def serve_ui(): if not os.path.exists("index.html"): return "

index.html not found

" with open("index.html", "r", encoding="utf-8") as f: return f.read()