Spaces:

minaewrw
/

mina-api

Paused

App Files Files Community

Mina commited on Jan 3

Commit

25ae7fe

0 Parent(s):

Fresh deploy without large files

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.gitignore +17 -0
Dockerfile +35 -0
Dockerfile.hf +35 -0
Procfile +1 -0
README.hf.md +33 -0
README.md +30 -0
database.py +48 -0
deploy/.dockerignore +30 -0
deploy/Dockerfile +50 -0
deploy/cloudflare-worker.js +77 -0
deploy/render.yaml +18 -0
downloader.py +145 -0
flaresolverr/bottle_plugins/__init__.py +0 -0
flaresolverr/bottle_plugins/error_plugin.py +22 -0
flaresolverr/bottle_plugins/logger_plugin.py +23 -0
flaresolverr/bottle_plugins/prometheus_plugin.py +66 -0
flaresolverr/build_package.py +126 -0
flaresolverr/dtos.py +94 -0
flaresolverr/flaresolverr.py +155 -0
flaresolverr/flaresolverr_service.py +519 -0
flaresolverr/metrics.py +32 -0
flaresolverr/sessions.py +84 -0
flaresolverr/tests.py +655 -0
flaresolverr/tests_sites.py +102 -0
flaresolverr/undetected_chromedriver/__init__.py +910 -0
flaresolverr/undetected_chromedriver/cdp.py +112 -0
flaresolverr/undetected_chromedriver/devtool.py +193 -0
flaresolverr/undetected_chromedriver/dprocess.py +77 -0
flaresolverr/undetected_chromedriver/options.py +85 -0
flaresolverr/undetected_chromedriver/patcher.py +473 -0
flaresolverr/undetected_chromedriver/reactor.py +99 -0
flaresolverr/undetected_chromedriver/webelement.py +86 -0
flaresolverr/utils.py +376 -0
keep_alive.py +47 -0
main.py +352 -0
package.json +12 -0
requirements.txt +14 -0
scraper/engine.py +996 -0
scraper/proxy_fetcher.py +66 -0
start.sh +31 -0
start_render.sh +22 -0
tools/analyze_structure.py +36 -0
tools/check_mirrors.py +34 -0
tools/debug_fs.py +51 -0
tools/debug_mirrors.py +35 -0
tools/debug_scraper.py +27 -0
tools/dump_html.py +25 -0
tools/dump_html_v2.py +25 -0
tools/extra/diagnose.py +27 -0
tools/extra/expose_to_internet.bat +18 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,17 @@

+venv/
+__pycache__/
+archive/
+*.db
+*.log
+.env
+.vscode/
+.idea/
+bin/
+cache/
+logs/
+*.exe
+*.img
+dist/
+node_modules/
+.choreo/
+TUNNEL_TOKEN.txt

Dockerfile ADDED Viewed

	@@ -0,0 +1,35 @@

+FROM python:3.10-slim
+# Hugging Face Optimized - Lightweight & Stable
+ENV PYTHONUNBUFFERED=1
+ENV HF_SPACE=1
+# Install minimal system dependencies
+RUN apt-get update && apt-get install -y \
+    curl \
+    ffmpeg \
+    && rm -rf /var/lib/apt/lists/*
+WORKDIR /app
+# Copy requirements and install
+COPY requirements.txt .
+RUN pip install --no-cache-dir --upgrade -r requirements.txt
+# Create a non-privileged user (Required by Hugging Face)
+RUN useradd -m -u 1000 user
+RUN chown -R user:user /app
+USER user
+ENV HOME=/home/user \
+    PATH=/home/user/.local/bin:$PATH
+# Copy application code
+COPY --chown=user:user . .
+# Hugging Face uses port 7860
+EXPOSE 7860
+ENV PORT=7860
+# Start the application with optimized settings for limited RAM
+# We use 1 worker to keep memory usage low on the free tier
+CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860", "--workers", "1", "--timeout-keep-alive", "60"]

Dockerfile.hf ADDED Viewed

	@@ -0,0 +1,35 @@

+FROM python:3.10-slim
+# Hugging Face optimized - Lightweight without Chrome
+ENV PYTHONUNBUFFERED=1
+ENV SPACE_ID=huggingface
+ENV HF_SPACE=1
+# Install minimal dependencies
+RUN apt-get update && apt-get install -y \
+    curl \
+    git \
+    && rm -rf /var/lib/apt/lists/*
+WORKDIR /app
+# Copy and install Python dependencies
+COPY requirements.txt .
+RUN pip install --no-cache-dir --upgrade -r requirements.txt
+# Create user for Hugging Face
+RUN useradd -m -u 1000 user
+RUN chown -R user:user /app
+USER user
+ENV HOME=/home/user \
+    PATH=/home/user/.local/bin:$PATH
+# Copy application
+COPY --chown=user:user . .
+# Hugging Face uses port 7860
+EXPOSE 7860
+ENV PORT=7860
+# Start without FlareSolverr (too heavy for HF)
+CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860", "--workers", "1"]

Procfile ADDED Viewed

	@@ -0,0 +1 @@


1	+ web: uvicorn main:app --host 0.0.0.0 --port $PORT --log-level info

README.hf.md ADDED Viewed

	@@ -0,0 +1,33 @@

+---
+title: MEIH Movies API
+emoji: 🎬
+colorFrom: red
+colorTo: gray
+sdk: docker
+app_file: main.py
+pinned: false
+license: mit
+---
+# MEIH Movies API - Hugging Face Edition
+High-performance movie streaming API optimized for Hugging Face Spaces.
+## Features
+- Fast content scraping with curl-cffi
+- Intelligent caching system
+- Rate limiting for stability
+- Proxy rotation support
+## API Endpoints
+- `GET /latest` - Latest movies and series
+- `GET /category/{cat_id}` - Browse by category
+- `GET /search?q={query}` - Search content
+- `GET /details/{id}` - Get streaming links
+- `GET /health` - Health check
+## Usage
+Visit the API at: `https://YOUR-SPACE-NAME.hf.space/`

README.md ADDED Viewed

	@@ -0,0 +1,30 @@

+---
+title: Meih Movies API
+emoji: 🎬
+colorFrom: red
+colorTo: gray
+sdk: docker
+pinned: false
+---
+# MEIH Movies API - Hugging Face Edition
+High-performance movie streaming API optimized for Hugging Face Spaces.
+## Features
+- **Lightweight**: Optimized for 16GB RAM environments.
+- **Fast**: Powered by `curl-cffi` for high-speed scraping.
+- **Stable**: Automatic proxy rotation and intelligent caching.
+- **Universal**: Serves both API and Frontend (if built).
+## API Endpoints
+- `GET /latest` - Latest movies and series.
+- `GET /search?q={query}` - Search content.
+- `GET /details/{id}` - Get streaming links.
+- `GET /health` - System status.
+## Deployment Note
+This project is configured to run on port **7860**. Ensure your Space is set to **Docker** SDK.

database.py ADDED Viewed

	@@ -0,0 +1,48 @@

+import aiosqlite
+import logging
+DB_NAME = "netflix_clone.db"
+async def init_db():
+    async with aiosqlite.connect(DB_NAME) as db:
+        # Movies Table
+        await db.execute("""
+            CREATE TABLE IF NOT EXISTS movies (
+                id TEXT PRIMARY KEY,
+                title TEXT,
+                poster TEXT,
+                year TEXT,
+                rating TEXT,
+                description TEXT,
+                category TEXT
+            )
+        """)
+        # Series Table
+        await db.execute("""
+            CREATE TABLE IF NOT EXISTS series (
+                id TEXT PRIMARY KEY,
+                title TEXT,
+                poster TEXT,
+                year TEXT,
+                rating TEXT,
+                description TEXT,
+                category TEXT
+            )
+        """)
+        # Episodes Table
+        await db.execute("""
+            CREATE TABLE IF NOT EXISTS episodes (
+                id INTEGER PRIMARY KEY AUTOINCREMENT,
+                series_id TEXT,
+                episode_number INTEGER,
+                title TEXT,
+                watch_link TEXT,
+                FOREIGN KEY(series_id) REFERENCES series(id)
+            )
+        """)
+        await db.commit()
+async def get_db_connection():
+    db = await aiosqlite.connect(DB_NAME)
+    db.row_factory = aiosqlite.Row
+    return db

deploy/.dockerignore ADDED Viewed

	@@ -0,0 +1,30 @@

+# Python ignore
+__pycache__/
+*.py[cod]
+*$py.class
+venv/
+.env
+netflix_clone.db
+archive/
+tools/
+# Node ignore
+node_modules/
+dist/
+build/
+.next/
+.vite/
+# Git ignore
+.git/
+.gitignore
+# OS ignore
+.DS_Store
+Thumbs.db
+# Project ignore
+setup_and_run.bat
+*.md
+.gemini/
+.agent/

deploy/Dockerfile ADDED Viewed

	@@ -0,0 +1,50 @@

+# ==========================================
+# Nitro Backend-Only Dockerfile for Hugging Face
+# ==========================================
+FROM python:3.11-slim
+# Install system dependencies for Scraper (Chrome) and FlareSolverr
+ENV DEBIAN_FRONTEND=noninteractive
+RUN apt-get update && apt-get install -y \
+    ffmpeg \
+    curl \
+    git \
+    wget \
+    gnupg \
+    xvfb \
+    xauth \
+    dos2unix \
+    libnss3 \
+    libatk-bridge2.0-0 \
+    libgtk-3-0 \
+    && wget -q -O - https://dl-ssl.google.com/linux/linux_signing_key.pub | gpg --dearmor -o /usr/share/keyrings/google-chrome.gpg \
+    && echo "deb [arch=amd64 signed-by=/usr/share/keyrings/google-chrome.gpg] http://dl.google.com/linux/chrome/deb/ stable main" > /etc/apt/sources.list.d/google-chrome.list \
+    && apt-get update && apt-get install -y google-chrome-stable \
+    && rm -rf /var/lib/apt/lists/*
+WORKDIR /app
+# Install Backend Dependencies
+COPY backend/requirements.txt ./
+RUN pip install --no-cache-dir --upgrade pip && \
+    pip install --no-cache-dir -r requirements.txt
+# Copy Backend Application
+COPY backend/ ./
+# Fix line endings and permissions
+RUN dos2unix start.sh && chmod +x start.sh
+# Create local user for Hugging Face Spaces (UID 1000)
+RUN useradd -m -u 1000 user
+RUN chown -R user:user /app
+USER user
+ENV HOME=/home/user \
+    PATH=/home/user/.local/bin:$PATH \
+    PYTHONPATH=/app
+# Expose the mandatory Hugging Face Space port
+EXPOSE 7860
+# Kickstart the engine
+CMD ["/bin/bash", "./start.sh"]

deploy/cloudflare-worker.js ADDED Viewed

	@@ -0,0 +1,77 @@

+/**
+ * Cloudflare Worker - Proxy Bypass for Larooza Scraper
+ * Deploy this to Cloudflare Workers (100% FREE)
+ *
+ * This worker acts as a middle-man to bypass IP bans
+ */
+addEventListener('fetch', event => {
+  event.respondWith(handleRequest(event.request))
+})
+async function handleRequest(request) {
+  // Enable CORS
+  const corsHeaders = {
+    'Access-Control-Allow-Origin': '*',
+    'Access-Control-Allow-Methods': 'GET, POST, OPTIONS',
+    'Access-Control-Allow-Headers': 'Content-Type',
+  }
+  // Handle CORS preflight
+  if (request.method === 'OPTIONS') {
+    return new Response(null, { headers: corsHeaders })
+  }
+  // Get target URL from query parameter
+  const url = new URL(request.url)
+  const targetUrl = url.searchParams.get('url')
+  if (!targetUrl) {
+    return new Response(JSON.stringify({ error: 'Missing url parameter' }), {
+      status: 400,
+      headers: { ...corsHeaders, 'Content-Type': 'application/json' }
+    })
+  }
+  try {
+    // Fetch the target URL with realistic headers
+    const response = await fetch(targetUrl, {
+      headers: {
+        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
+        'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8',
+        'Accept-Language': 'ar,en-US;q=0.9,en;q=0.8',
+        'Referer': 'https://www.google.com/',
+        'DNT': '1',
+        'Connection': 'keep-alive',
+        'Upgrade-Insecure-Requests': '1',
+      },
+      cf: {
+        // Cloudflare-specific options
+        cacheTtl: 300, // Cache for 5 minutes
+        cacheEverything: true,
+      }
+    })
+    // Get the HTML content
+    const html = await response.text()
+    // Return with CORS headers
+    return new Response(html, {
+      status: response.status,
+      headers: {
+        ...corsHeaders,
+        'Content-Type': 'text/html; charset=utf-8',
+        'Cache-Control': 'public, max-age=300',
+      }
+    })
+  } catch (error) {
+    return new Response(JSON.stringify({
+      error: 'Failed to fetch target URL',
+      message: error.message
+    }), {
+      status: 500,
+      headers: { ...corsHeaders, 'Content-Type': 'application/json' }
+    })
+  }
+}

deploy/render.yaml ADDED Viewed

	@@ -0,0 +1,18 @@

+# Render.com Deployment Configuration
+# https://render.com
+services:
+  - type: web
+    name: meih-movies-api
+    env: docker
+    dockerfilePath: ./Dockerfile
+    dockerContext: ./backend
+    plan: free
+    region: oregon
+    envVars:
+      - key: PYTHON_VERSION
+        value: 3.11
+      - key: PORT
+        value: 7860
+    healthCheckPath: /health
+    autoDeploy: true

downloader.py ADDED Viewed

	@@ -0,0 +1,145 @@

+import yt_dlp
+import logging
+import asyncio
+logger = logging.getLogger(__name__)
+class VideoDownloader:
+    def __init__(self):
+        self.ydl_opts = {
+            'quiet': True,
+            'no_warnings': True,
+            'format': 'best',
+            'user_agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
+            'geo_bypass': True,
+            'no_playlist': True,
+            'nocheckcertificate': True,
+        }
+    async def get_info(self, url: str):
+        # 1. Handle Local Watch/Details Links or Direct Larooza Links
+        is_larooza = any(x in url for x in ['larozavideo', 'larooza', 'laroza'])
+        if "/watch/" in url or "/details/" in url or is_larooza:
+            try:
+                from scraper.engine import scraper
+                import base64
+                target_url = url
+                if "/watch/" in url or "/details/" in url:
+                    id_part = url.split("/")[-1].split("?")[0]
+                    if not id_part.startswith("http"):
+                        target_url = base64.urlsafe_b64decode(id_part).decode()
+                # If it's a Larooza link (direct or decoded), use scraper
+                if any(x in target_url for x in ['larozavideo', 'larooza', 'laroza']):
+                    logger.info(f"Routing Larooza link to scraper: {target_url}")
+                    # Normalize: downloader works better with the video.php page
+                    target_url = target_url.replace('play.php', 'video.php').replace('download.php', 'video.php')
+                    safe_id = base64.urlsafe_b64encode(target_url.encode()).decode()
+                    data = await scraper.fetch_details(safe_id)
+                    if data and data.get('download_links'):
+                        formats = []
+                        for dl in data['download_links']:
+                            formats.append({
+                                'ext': 'mp4',
+                                'resolution': dl['quality'],
+                                'url': dl['url'],
+                                'type': 'video'
+                            })
+                        return {
+                            'title': data.get('title'),
+                            'thumbnail': data.get('poster'),
+                            'duration': 0,
+                            'uploader': 'Larooza',
+                            'source': 'Larooza',
+                            'formats': formats
+                        }
+                    elif data:
+                         return {"error": "لم يتم العثور على روابط تحميل لهذا الفيديو (ربما محمي أو غير متاح حالياً)."}
+            except Exception as e:
+                logger.error(f"Larooza-specific extraction failed: {e}")
+        # 2. Universal yt-dlp Path (YouTube, TikTok, etc.)
+        try:
+            loop = asyncio.get_event_loop()
+            # Use a more robust extraction with a timeout
+            try:
+                info = await asyncio.wait_for(
+                    loop.run_in_executor(None, lambda: self._extract(url)),
+                    timeout=30.0
+                )
+            except asyncio.TimeoutError:
+                logger.error(f"Timeout extracting info for {url}")
+                return {"error": "استغرق استخراج البيانات وقتاً طويلاً. حاول مرة أخرى."}
+            if not info:
+                return {"error": "فشل في استخراج بيانات الفيديو. تأكد من الرابط."}
+            # Live stream check
+            if info.get('is_live') or info.get('live_status') == 'is_upcoming':
+                return {"error": "هذا الفيديو لم يبدأ عرضه بعد أو هو بث مباشر حالياً."}
+            formats = []
+            seen_resolutions = set()
+            # Extract usable formats
+            raw_formats = info.get('formats', [])
+            if not raw_formats and info.get('url'):
+                raw_formats = [info] # For direct links
+            for f in raw_formats:
+                if not f: continue
+                # Filter out formats without a direct URL or those that are just manifests
+                f_url = f.get('url')
+                if not f_url or '.m3u8' in f_url or '.mpd' in f_url:
+                    continue
+                ext = f.get('ext', 'mp4')
+                res = f.get('resolution') or f.get('format_note') or f.get('height') or 'Unknown'
+                # Clean resolution label
+                if isinstance(res, int): res = f"{res}p"
+                # Avoid duplicates and prioritize video formats
+                res_key = f"{res}_{f.get('vcodec') != 'none'}"
+                if res_key in seen_resolutions: continue
+                seen_resolutions.add(res_key)
+                formats.append({
+                    'id': f.get('format_id', 'unknown'),
+                    'ext': ext,
+                    'resolution': res,
+                    'filesize': f.get('filesize') or f.get('filesize_approx') or 0,
+                    'url': f_url,
+                    'type': 'video' if f.get('vcodec') != 'none' else 'audio'
+                })
+            if not formats:
+                return {"error": "لم يتم العثور على روابط تحميل مباشرة مدعومة لهذا الفيديو."}
+            return {
+                'title': info.get('title', 'Video'),
+                'thumbnail': info.get('thumbnail', ''),
+                'duration': info.get('duration', 0),
+                'uploader': info.get('uploader', 'Unknown'),
+                'source': info.get('extractor_key', 'Unknown'),
+                'formats': formats[::-1]
+            }
+        except Exception as e:
+            logger.error(f"Universal Downloader error for {url}: {e}")
+            return {"error": f"حدث خطأ غير متوقع: {str(e)}"}
+    def _extract(self, url):
+        opts = self.ydl_opts.copy()
+        # Add extra robustness for TikTok and newer sites
+        opts.update({
+            'nocheckcertificate': True,
+            'ignoreerrors': True,
+            'socket_timeout': 15,
+        })
+        with yt_dlp.YoutubeDL(opts) as ydl:
+            return ydl.extract_info(url, download=False)
+downloader = VideoDownloader()

flaresolverr/bottle_plugins/__init__.py ADDED Viewed

File without changes

flaresolverr/bottle_plugins/error_plugin.py ADDED Viewed

	@@ -0,0 +1,22 @@

+from bottle import response
+import logging
+def error_plugin(callback):
+    """
+    Bottle plugin to handle exceptions
+    https://stackoverflow.com/a/32764250
+    """
+    def wrapper(*args, **kwargs):
+        try:
+            actual_response = callback(*args, **kwargs)
+        except Exception as e:
+            logging.error(str(e))
+            actual_response = {
+                "error": str(e)
+            }
+            response.status = 500
+        return actual_response
+    return wrapper

flaresolverr/bottle_plugins/logger_plugin.py ADDED Viewed

	@@ -0,0 +1,23 @@

+from bottle import request, response
+import logging
+def logger_plugin(callback):
+    """
+    Bottle plugin to use logging module
+    https://bottlepy.org/docs/dev/plugindev.html
+    Wrap a Bottle request so that a log line is emitted after it's handled.
+    (This decorator can be extended to take the desired logger as a param.)
+    """
+    def wrapper(*args, **kwargs):
+        actual_response = callback(*args, **kwargs)
+        if not request.url.endswith("/health"):
+            logging.info('%s %s %s %s' % (request.remote_addr,
+                                          request.method,
+                                          request.url,
+                                          response.status))
+        return actual_response
+    return wrapper

flaresolverr/bottle_plugins/prometheus_plugin.py ADDED Viewed

	@@ -0,0 +1,66 @@

+import logging
+import os
+import urllib.parse
+from bottle import request
+from dtos import V1RequestBase, V1ResponseBase
+from metrics import start_metrics_http_server, REQUEST_COUNTER, REQUEST_DURATION
+PROMETHEUS_ENABLED = os.environ.get('PROMETHEUS_ENABLED', 'false').lower() == 'true'
+PROMETHEUS_PORT = int(os.environ.get('PROMETHEUS_PORT', 8192))
+def setup():
+    if PROMETHEUS_ENABLED:
+        start_metrics_http_server(PROMETHEUS_PORT)
+def prometheus_plugin(callback):
+    """
+    Bottle plugin to expose Prometheus metrics
+    https://bottlepy.org/docs/dev/plugindev.html
+    """
+    def wrapper(*args, **kwargs):
+        actual_response = callback(*args, **kwargs)
+        if PROMETHEUS_ENABLED:
+            try:
+                export_metrics(actual_response)
+            except Exception as e:
+                logging.warning("Error exporting metrics: " + str(e))
+        return actual_response
+    def export_metrics(actual_response):
+        res = V1ResponseBase(actual_response)
+        if res.startTimestamp is None or res.endTimestamp is None:
+            # skip management and healthcheck endpoints
+            return
+        domain = "unknown"
+        if res.solution and res.solution.url:
+            domain = parse_domain_url(res.solution.url)
+        else:
+            # timeout error
+            req = V1RequestBase(request.json)
+            if req.url:
+                domain = parse_domain_url(req.url)
+        run_time = (res.endTimestamp - res.startTimestamp) / 1000
+        REQUEST_DURATION.labels(domain=domain).observe(run_time)
+        result = "unknown"
+        if res.message == "Challenge solved!":
+            result = "solved"
+        elif res.message == "Challenge not detected!":
+            result = "not_detected"
+        elif res.message.startswith("Error"):
+            result = "error"
+        REQUEST_COUNTER.labels(domain=domain, result=result).inc()
+    def parse_domain_url(url):
+        parsed_url = urllib.parse.urlparse(url)
+        return parsed_url.hostname
+    return wrapper

flaresolverr/build_package.py ADDED Viewed

	@@ -0,0 +1,126 @@

+import os
+import platform
+import shutil
+import subprocess
+import sys
+import zipfile
+import tarfile
+import requests
+def clean_files():
+    try:
+        shutil.rmtree(os.path.join(os.path.dirname(os.path.abspath(__file__)), os.pardir, 'build'))
+    except Exception:
+        pass
+    try:
+        shutil.rmtree(os.path.join(os.path.dirname(os.path.abspath(__file__)), os.pardir, 'dist'))
+    except Exception:
+        pass
+    try:
+        shutil.rmtree(os.path.join(os.path.dirname(os.path.abspath(__file__)), os.pardir, 'dist_chrome'))
+    except Exception:
+        pass
+def download_chromium():
+    # https://commondatastorage.googleapis.com/chromium-browser-snapshots/index.html?prefix=Linux_x64/
+    revision = "1522586" if os.name == 'nt' else '1522586'
+    arch = 'Win_x64' if os.name == 'nt' else 'Linux_x64'
+    dl_file = 'chrome-win' if os.name == 'nt' else 'chrome-linux'
+    dl_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), os.pardir, 'dist_chrome')
+    dl_path_folder = os.path.join(dl_path, dl_file)
+    dl_path_zip = dl_path_folder + '.zip'
+    # response = requests.get(
+    #     f'https://commondatastorage.googleapis.com/chromium-browser-snapshots/{arch}/LAST_CHANGE',
+    #     timeout=30)
+    # revision = response.text.strip()
+    print("Downloading revision: " + revision)
+    os.mkdir(dl_path)
+    with requests.get(
+            f'https://commondatastorage.googleapis.com/chromium-browser-snapshots/{arch}/{revision}/{dl_file}.zip',
+            stream=True) as r:
+        r.raise_for_status()
+        with open(dl_path_zip, 'wb') as f:
+            for chunk in r.iter_content(chunk_size=8192):
+                f.write(chunk)
+    print("File downloaded: " + dl_path_zip)
+    with zipfile.ZipFile(dl_path_zip, 'r') as zip_ref:
+        zip_ref.extractall(dl_path)
+    os.remove(dl_path_zip)
+    chrome_path = os.path.join(dl_path, "chrome")
+    shutil.move(dl_path_folder, chrome_path)
+    print("Extracted in: " + chrome_path)
+    if os.name != 'nt':
+        # Give executable permissions for *nix
+        # file * | grep executable | cut -d: -f1
+        print("Giving executable permissions...")
+        execs = ['chrome', 'chrome_crashpad_handler', 'chrome_sandbox', 'chrome-wrapper', 'xdg-mime', 'xdg-settings']
+        for exec_file in execs:
+            exec_path = os.path.join(chrome_path, exec_file)
+            os.chmod(exec_path, 0o755)
+def run_pyinstaller():
+    sep = ';' if os.name == 'nt' else ':'
+    result = subprocess.run([sys.executable, "-m", "PyInstaller",
+                             "--icon", "resources/flaresolverr_logo.ico",
+                             "--add-data", f"package.json{sep}.",
+                             "--add-data", f"{os.path.join('dist_chrome', 'chrome')}{sep}chrome",
+                             os.path.join("src", "flaresolverr.py")],
+                            cwd=os.pardir, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+    if result.returncode != 0:
+        print(result.stderr.decode('utf-8'))
+        raise Exception("Error running pyInstaller")
+def compress_package():
+    dist_folder = os.path.join(os.path.dirname(os.path.abspath(__file__)), os.pardir, 'dist')
+    package_folder = os.path.join(dist_folder, 'package')
+    shutil.move(os.path.join(dist_folder, 'flaresolverr'), os.path.join(package_folder, 'flaresolverr'))
+    print("Package folder: " + package_folder)
+    compr_format = 'zip' if os.name == 'nt' else 'gztar'
+    compr_file_name = 'flaresolverr_windows_x64' if os.name == 'nt' else 'flaresolverr_linux_x64'
+    compr_file_path = os.path.join(dist_folder, compr_file_name)
+    if compr_format == 'zip':
+        shutil.make_archive(compr_file_path, compr_format, package_folder)
+        print("Compressed file path: " + compr_file_path)
+    else:
+        def _reset_tarinfo(tarinfo):
+            tarinfo.uid = 0
+            tarinfo.gid = 0
+            tarinfo.uname = ""
+            tarinfo.gname = ""
+            return tarinfo
+        tar_path = compr_file_path + '.tar.gz'
+        with tarfile.open(tar_path, 'w:gz') as tar:
+            for entry in os.listdir(package_folder):
+                fullpath = os.path.join(package_folder, entry)
+                tar.add(fullpath, arcname=entry, filter=_reset_tarinfo)
+        print("Compressed file path: " + tar_path)
+if __name__ == "__main__":
+    print("Building package...")
+    print("Platform: " + platform.platform())
+    print("Cleaning previous build...")
+    clean_files()
+    print("Downloading Chromium...")
+    download_chromium()
+    print("Building pyinstaller executable... ")
+    run_pyinstaller()
+    print("Compressing package... ")
+    compress_package()
+# NOTE: python -m pip install pyinstaller

flaresolverr/dtos.py ADDED Viewed

	@@ -0,0 +1,94 @@

+STATUS_OK = "ok"
+STATUS_ERROR = "error"
+class ChallengeResolutionResultT:
+    url: str = None
+    status: int = None
+    headers: list = None
+    response: str = None
+    cookies: list = None
+    userAgent: str = None
+    screenshot: str | None = None
+    turnstile_token: str = None
+    def __init__(self, _dict):
+        self.__dict__.update(_dict)
+class ChallengeResolutionT:
+    status: str = None
+    message: str = None
+    result: ChallengeResolutionResultT = None
+    def __init__(self, _dict):
+        self.__dict__.update(_dict)
+        if self.result is not None:
+            self.result = ChallengeResolutionResultT(self.result)
+class V1RequestBase(object):
+    # V1RequestBase
+    cmd: str = None
+    cookies: list = None
+    maxTimeout: int = None
+    proxy: dict = None
+    session: str = None
+    session_ttl_minutes: int = None
+    headers: list = None  # deprecated v2.0.0, not used
+    userAgent: str = None  # deprecated v2.0.0, not used
+    # V1Request
+    url: str = None
+    postData: str = None
+    returnOnlyCookies: bool = None
+    returnScreenshot: bool = None
+    download: bool = None   # deprecated v2.0.0, not used
+    returnRawHtml: bool = None  # deprecated v2.0.0, not used
+    waitInSeconds: int = None
+    # Optional resource blocking flag (blocks images, CSS, and fonts)
+    disableMedia: bool = None
+    # Optional when you've got a turnstile captcha that needs to be clicked after X number of Tab presses
+    tabs_till_verify : int = None
+    def __init__(self, _dict):
+        self.__dict__.update(_dict)
+class V1ResponseBase(object):
+    # V1ResponseBase
+    status: str = None
+    message: str = None
+    session: str = None
+    sessions: list[str] = None
+    startTimestamp: int = None
+    endTimestamp: int = None
+    version: str = None
+    # V1ResponseSolution
+    solution: ChallengeResolutionResultT = None
+    # hidden vars
+    __error_500__: bool = False
+    def __init__(self, _dict):
+        self.__dict__.update(_dict)
+        if self.solution is not None:
+            self.solution = ChallengeResolutionResultT(self.solution)
+class IndexResponse(object):
+    msg: str = None
+    version: str = None
+    userAgent: str = None
+    def __init__(self, _dict):
+        self.__dict__.update(_dict)
+class HealthResponse(object):
+    status: str = None
+    def __init__(self, _dict):
+        self.__dict__.update(_dict)

flaresolverr/flaresolverr.py ADDED Viewed

	@@ -0,0 +1,155 @@

+import json
+import logging
+import os
+import sys
+import certifi
+from bottle import run, response, Bottle, request, ServerAdapter
+from bottle_plugins.error_plugin import error_plugin
+from bottle_plugins.logger_plugin import logger_plugin
+from bottle_plugins import prometheus_plugin
+from dtos import V1RequestBase
+import flaresolverr_service
+import utils
+env_proxy_url = os.environ.get('PROXY_URL', None)
+env_proxy_username = os.environ.get('PROXY_USERNAME', None)
+env_proxy_password = os.environ.get('PROXY_PASSWORD', None)
+class JSONErrorBottle(Bottle):
+    """
+    Handle 404 errors
+    """
+    def default_error_handler(self, res):
+        response.content_type = 'application/json'
+        return json.dumps(dict(error=res.body, status_code=res.status_code))
+app = JSONErrorBottle()
+@app.route('/')
+def index():
+    """
+    Show welcome message
+    """
+    res = flaresolverr_service.index_endpoint()
+    return utils.object_to_dict(res)
+@app.route('/health')
+def health():
+    """
+    Healthcheck endpoint.
+    This endpoint is special because it doesn't print traces
+    """
+    res = flaresolverr_service.health_endpoint()
+    return utils.object_to_dict(res)
+@app.post('/v1')
+def controller_v1():
+    """
+    Controller v1
+    """
+    data = request.json or {}
+    if (('proxy' not in data or not data.get('proxy')) and env_proxy_url is not None and (env_proxy_username is None and env_proxy_password is None)):
+        logging.info('Using proxy URL ENV')
+        data['proxy'] = {"url": env_proxy_url}
+    if (('proxy' not in data or not data.get('proxy')) and env_proxy_url is not None and (env_proxy_username is not None or env_proxy_password is not None)):
+        logging.info('Using proxy URL, username & password ENVs')
+        data['proxy'] = {"url": env_proxy_url, "username": env_proxy_username, "password": env_proxy_password}
+    req = V1RequestBase(data)
+    res = flaresolverr_service.controller_v1_endpoint(req)
+    if res.__error_500__:
+        response.status = 500
+    return utils.object_to_dict(res)
+if __name__ == "__main__":
+    # check python version
+    if sys.version_info < (3, 9):
+        raise Exception("The Python version is less than 3.9, a version equal to or higher is required.")
+    # fix for HEADLESS=false in Windows binary
+    # https://stackoverflow.com/a/27694505
+    if os.name == 'nt':
+        import multiprocessing
+        multiprocessing.freeze_support()
+    # fix ssl certificates for compiled binaries
+    # https://github.com/pyinstaller/pyinstaller/issues/7229
+    # https://stackoverflow.com/q/55736855
+    os.environ["REQUESTS_CA_BUNDLE"] = certifi.where()
+    os.environ["SSL_CERT_FILE"] = certifi.where()
+    # validate configuration
+    log_level = os.environ.get('LOG_LEVEL', 'info').upper()
+    log_file = os.environ.get('LOG_FILE', None)
+    log_html = utils.get_config_log_html()
+    headless = utils.get_config_headless()
+    server_host = os.environ.get('HOST', '0.0.0.0')
+    server_port = int(os.environ.get('PORT', 8191))
+    # configure logger
+    logger_format = '%(asctime)s %(levelname)-8s %(message)s'
+    if log_level == 'DEBUG':
+        logger_format = '%(asctime)s %(levelname)-8s ReqId %(thread)s %(message)s'
+    if log_file:
+        log_file = os.path.realpath(log_file)
+        log_path = os.path.dirname(log_file)
+        os.makedirs(log_path, exist_ok=True)
+        logging.basicConfig(
+            format=logger_format,
+            level=log_level,
+            datefmt='%Y-%m-%d %H:%M:%S',
+            handlers=[
+                logging.StreamHandler(sys.stdout),
+                logging.FileHandler(log_file)
+            ]
+        )
+    else:
+        logging.basicConfig(
+            format=logger_format,
+            level=log_level,
+            datefmt='%Y-%m-%d %H:%M:%S',
+            handlers=[
+                logging.StreamHandler(sys.stdout)
+            ]
+        )
+    # disable warning traces from urllib3
+    logging.getLogger('urllib3').setLevel(logging.ERROR)
+    logging.getLogger('selenium.webdriver.remote.remote_connection').setLevel(logging.WARNING)
+    logging.getLogger('undetected_chromedriver').setLevel(logging.WARNING)
+    logging.info(f'FlareSolverr {utils.get_flaresolverr_version()}')
+    logging.debug('Debug log enabled')
+    # Get current OS for global variable
+    utils.get_current_platform()
+    # test browser installation
+    if os.environ.get('SKIP_BROWSER_TEST', 'false').lower() != 'true':
+        flaresolverr_service.test_browser_installation()
+    else:
+        logging.info("Skipping browser installation test for faster boot.")
+    # start bootle plugins
+    # plugin order is important
+    app.install(logger_plugin)
+    app.install(error_plugin)
+    prometheus_plugin.setup()
+    app.install(prometheus_plugin.prometheus_plugin)
+    # start webserver
+    # default server 'wsgiref' does not support concurrent requests
+    # https://github.com/FlareSolverr/FlareSolverr/issues/680
+    # https://github.com/Pylons/waitress/issues/31
+    class WaitressServerPoll(ServerAdapter):
+        def run(self, handler):
+            from waitress import serve
+            serve(handler, host=self.host, port=self.port, asyncore_use_poll=True)
+    run(app, host=server_host, port=server_port, quiet=True, server=WaitressServerPoll)

flaresolverr/flaresolverr_service.py ADDED Viewed

	@@ -0,0 +1,519 @@

+import logging
+import platform
+import sys
+import time
+from datetime import timedelta
+from html import escape
+from urllib.parse import unquote, quote
+from func_timeout import FunctionTimedOut, func_timeout
+from selenium.common import TimeoutException
+from selenium.webdriver.chrome.webdriver import WebDriver
+from selenium.webdriver.common.by import By
+from selenium.webdriver.common.keys import Keys
+from selenium.webdriver.support.expected_conditions import (
+    presence_of_element_located, staleness_of, title_is)
+from selenium.webdriver.common.action_chains import ActionChains
+from selenium.webdriver.support.wait import WebDriverWait
+import utils
+from dtos import (STATUS_ERROR, STATUS_OK, ChallengeResolutionResultT,
+                  ChallengeResolutionT, HealthResponse, IndexResponse,
+                  V1RequestBase, V1ResponseBase)
+from sessions import SessionsStorage
+ACCESS_DENIED_TITLES = [
+    # Cloudflare
+    'Access denied',
+    # Cloudflare http://bitturk.net/ Firefox
+    'Attention Required! | Cloudflare'
+]
+ACCESS_DENIED_SELECTORS = [
+    # Cloudflare
+    'div.cf-error-title span.cf-code-label span',
+    # Cloudflare http://bitturk.net/ Firefox
+    '#cf-error-details div.cf-error-overview h1'
+]
+CHALLENGE_TITLES = [
+    # Cloudflare
+    'Just a moment...',
+    # DDoS-GUARD
+    'DDoS-Guard'
+]
+CHALLENGE_SELECTORS = [
+    # Cloudflare
+    '#cf-challenge-running', '.ray_id', '.attack-box', '#cf-please-wait', '#challenge-spinner', '#trk_jschal_js', '#turnstile-wrapper', '.lds-ring',
+    # Custom CloudFlare for EbookParadijs, Film-Paleis, MuziekFabriek and Puur-Hollands
+    'td.info #js_info',
+    # Fairlane / pararius.com
+    'div.vc div.text-box h2'
+]
+TURNSTILE_SELECTORS = [
+    "input[name='cf-turnstile-response']"
+]
+SHORT_TIMEOUT = 1
+SESSIONS_STORAGE = SessionsStorage()
+def test_browser_installation():
+    logging.info("Testing web browser installation...")
+    logging.info("Platform: " + platform.platform())
+    chrome_exe_path = utils.get_chrome_exe_path()
+    if chrome_exe_path is None:
+        logging.error("Chrome / Chromium web browser not installed!")
+        sys.exit(1)
+    else:
+        logging.info("Chrome / Chromium path: " + chrome_exe_path)
+    chrome_major_version = utils.get_chrome_major_version()
+    if chrome_major_version == '':
+        logging.error("Chrome / Chromium version not detected!")
+        sys.exit(1)
+    else:
+        logging.info("Chrome / Chromium major version: " + chrome_major_version)
+    logging.info("Launching web browser...")
+    user_agent = utils.get_user_agent()
+    logging.info("FlareSolverr User-Agent: " + user_agent)
+    logging.info("Test successful!")
+def index_endpoint() -> IndexResponse:
+    res = IndexResponse({})
+    res.msg = "FlareSolverr is ready!"
+    res.version = utils.get_flaresolverr_version()
+    res.userAgent = utils.get_user_agent()
+    return res
+def health_endpoint() -> HealthResponse:
+    res = HealthResponse({})
+    res.status = STATUS_OK
+    return res
+def controller_v1_endpoint(req: V1RequestBase) -> V1ResponseBase:
+    start_ts = int(time.time() * 1000)
+    logging.info(f"Incoming request => POST /v1 body: {utils.object_to_dict(req)}")
+    res: V1ResponseBase
+    try:
+        res = _controller_v1_handler(req)
+    except Exception as e:
+        res = V1ResponseBase({})
+        res.__error_500__ = True
+        res.status = STATUS_ERROR
+        res.message = "Error: " + str(e)
+        logging.error(res.message)
+    res.startTimestamp = start_ts
+    res.endTimestamp = int(time.time() * 1000)
+    res.version = utils.get_flaresolverr_version()
+    logging.debug(f"Response => POST /v1 body: {utils.object_to_dict(res)}")
+    logging.info(f"Response in {(res.endTimestamp - res.startTimestamp) / 1000} s")
+    return res
+def _controller_v1_handler(req: V1RequestBase) -> V1ResponseBase:
+    # do some validations
+    if req.cmd is None:
+        raise Exception("Request parameter 'cmd' is mandatory.")
+    if req.headers is not None:
+        logging.warning("Request parameter 'headers' was removed in FlareSolverr v2.")
+    if req.userAgent is not None:
+        logging.warning("Request parameter 'userAgent' was removed in FlareSolverr v2.")
+    # set default values
+    if req.maxTimeout is None or int(req.maxTimeout) < 1:
+        req.maxTimeout = 60000
+    # execute the command
+    res: V1ResponseBase
+    if req.cmd == 'sessions.create':
+        res = _cmd_sessions_create(req)
+    elif req.cmd == 'sessions.list':
+        res = _cmd_sessions_list(req)
+    elif req.cmd == 'sessions.destroy':
+        res = _cmd_sessions_destroy(req)
+    elif req.cmd == 'request.get':
+        res = _cmd_request_get(req)
+    elif req.cmd == 'request.post':
+        res = _cmd_request_post(req)
+    else:
+        raise Exception(f"Request parameter 'cmd' = '{req.cmd}' is invalid.")
+    return res
+def _cmd_request_get(req: V1RequestBase) -> V1ResponseBase:
+    # do some validations
+    if req.url is None:
+        raise Exception("Request parameter 'url' is mandatory in 'request.get' command.")
+    if req.postData is not None:
+        raise Exception("Cannot use 'postBody' when sending a GET request.")
+    if req.returnRawHtml is not None:
+        logging.warning("Request parameter 'returnRawHtml' was removed in FlareSolverr v2.")
+    if req.download is not None:
+        logging.warning("Request parameter 'download' was removed in FlareSolverr v2.")
+    challenge_res = _resolve_challenge(req, 'GET')
+    res = V1ResponseBase({})
+    res.status = challenge_res.status
+    res.message = challenge_res.message
+    res.solution = challenge_res.result
+    return res
+def _cmd_request_post(req: V1RequestBase) -> V1ResponseBase:
+    # do some validations
+    if req.postData is None:
+        raise Exception("Request parameter 'postData' is mandatory in 'request.post' command.")
+    if req.returnRawHtml is not None:
+        logging.warning("Request parameter 'returnRawHtml' was removed in FlareSolverr v2.")
+    if req.download is not None:
+        logging.warning("Request parameter 'download' was removed in FlareSolverr v2.")
+    challenge_res = _resolve_challenge(req, 'POST')
+    res = V1ResponseBase({})
+    res.status = challenge_res.status
+    res.message = challenge_res.message
+    res.solution = challenge_res.result
+    return res
+def _cmd_sessions_create(req: V1RequestBase) -> V1ResponseBase:
+    logging.debug("Creating new session...")
+    session, fresh = SESSIONS_STORAGE.create(session_id=req.session, proxy=req.proxy)
+    session_id = session.session_id
+    if not fresh:
+        return V1ResponseBase({
+            "status": STATUS_OK,
+            "message": "Session already exists.",
+            "session": session_id
+        })
+    return V1ResponseBase({
+        "status": STATUS_OK,
+        "message": "Session created successfully.",
+        "session": session_id
+    })
+def _cmd_sessions_list(req: V1RequestBase) -> V1ResponseBase:
+    session_ids = SESSIONS_STORAGE.session_ids()
+    return V1ResponseBase({
+        "status": STATUS_OK,
+        "message": "",
+        "sessions": session_ids
+    })
+def _cmd_sessions_destroy(req: V1RequestBase) -> V1ResponseBase:
+    session_id = req.session
+    existed = SESSIONS_STORAGE.destroy(session_id)
+    if not existed:
+        raise Exception("The session doesn't exist.")
+    return V1ResponseBase({
+        "status": STATUS_OK,
+        "message": "The session has been removed."
+    })
+def _resolve_challenge(req: V1RequestBase, method: str) -> ChallengeResolutionT:
+    timeout = int(req.maxTimeout) / 1000
+    driver = None
+    try:
+        if req.session:
+            session_id = req.session
+            ttl = timedelta(minutes=req.session_ttl_minutes) if req.session_ttl_minutes else None
+            session, fresh = SESSIONS_STORAGE.get(session_id, ttl)
+            if fresh:
+                logging.debug(f"new session created to perform the request (session_id={session_id})")
+            else:
+                logging.debug(f"existing session is used to perform the request (session_id={session_id}, "
+                              f"lifetime={str(session.lifetime())}, ttl={str(ttl)})")
+            driver = session.driver
+        else:
+            driver = utils.get_webdriver(req.proxy)
+            logging.debug('New instance of webdriver has been created to perform the request')
+        return func_timeout(timeout, _evil_logic, (req, driver, method))
+    except FunctionTimedOut:
+        raise Exception(f'Error solving the challenge. Timeout after {timeout} seconds.')
+    except Exception as e:
+        raise Exception('Error solving the challenge. ' + str(e).replace('\n', '\\n'))
+    finally:
+        if not req.session and driver is not None:
+            if utils.PLATFORM_VERSION == "nt":
+                driver.close()
+            driver.quit()
+            logging.debug('A used instance of webdriver has been destroyed')
+def click_verify(driver: WebDriver, num_tabs: int = 1):
+    try:
+        logging.debug("Try to find the Cloudflare verify checkbox...")
+        actions = ActionChains(driver)
+        actions.pause(5)
+        for _ in range(num_tabs):
+            actions.send_keys(Keys.TAB).pause(0.1)
+        actions.pause(1)
+        actions.send_keys(Keys.SPACE).perform()
+        logging.debug(f"Cloudflare verify checkbox clicked after {num_tabs} tabs!")
+    except Exception:
+        logging.debug("Cloudflare verify checkbox not found on the page.")
+    finally:
+        driver.switch_to.default_content()
+    try:
+        logging.debug("Try to find the Cloudflare 'Verify you are human' button...")
+        button = driver.find_element(
+            by=By.XPATH,
+            value="//input[@type='button' and @value='Verify you are human']",
+        )
+        if button:
+            actions = ActionChains(driver)
+            actions.move_to_element_with_offset(button, 5, 7)
+            actions.click(button)
+            actions.perform()
+            logging.debug("The Cloudflare 'Verify you are human' button found and clicked!")
+    except Exception:
+        logging.debug("The Cloudflare 'Verify you are human' button not found on the page.")
+    time.sleep(2)
+def _get_turnstile_token(driver: WebDriver, tabs: int):
+    token_input = driver.find_element(By.CSS_SELECTOR, "input[name='cf-turnstile-response']")
+    current_value = token_input.get_attribute("value")
+    while True:
+        click_verify(driver, num_tabs=tabs)
+        turnstile_token = token_input.get_attribute("value")
+        if turnstile_token:
+            if turnstile_token != current_value:
+                logging.info(f"Turnstile token: {turnstile_token}")
+                return turnstile_token
+        logging.debug(f"Failed to extract token possibly click failed")
+        # reset focus
+        driver.execute_script("""
+            let el = document.createElement('button');
+            el.style.position='fixed';
+            el.style.top='0';
+            el.style.left='0';
+            document.body.prepend(el);
+            el.focus();
+        """)
+        time.sleep(1)
+def _resolve_turnstile_captcha(req: V1RequestBase, driver: WebDriver):
+    turnstile_token = None
+    if req.tabs_till_verify is not None:
+        logging.debug(f'Navigating to... {req.url} in order to pass the turnstile challenge')
+        driver.get(req.url)
+        turnstile_challenge_found = False
+        for selector in TURNSTILE_SELECTORS:
+            found_elements = driver.find_elements(By.CSS_SELECTOR, selector)
+            if len(found_elements) > 0:
+                turnstile_challenge_found = True
+                logging.info("Turnstile challenge detected. Selector found: " + selector)
+                break
+        if turnstile_challenge_found:
+            turnstile_token = _get_turnstile_token(driver=driver, tabs=req.tabs_till_verify)
+        else:
+            logging.debug(f'Turnstile challenge not found')
+    return turnstile_token
+def _evil_logic(req: V1RequestBase, driver: WebDriver, method: str) -> ChallengeResolutionT:
+    res = ChallengeResolutionT({})
+    res.status = STATUS_OK
+    res.message = ""
+    # optionally block resources like images/css/fonts using CDP
+    disable_media = utils.get_config_disable_media()
+    if req.disableMedia is not None:
+        disable_media = req.disableMedia
+    if disable_media:
+        block_urls = [
+            # Images
+            "*.png", "*.jpg", "*.jpeg", "*.gif", "*.webp", "*.bmp", "*.svg", "*.ico",
+            "*.PNG", "*.JPG", "*.JPEG", "*.GIF", "*.WEBP", "*.BMP", "*.SVG", "*.ICO",
+            "*.tiff", "*.tif", "*.jpe", "*.apng", "*.avif", "*.heic", "*.heif",
+            "*.TIFF", "*.TIF", "*.JPE", "*.APNG", "*.AVIF", "*.HEIC", "*.HEIF",
+            # Stylesheets
+            "*.css",
+            "*.CSS",
+            # Fonts
+            "*.woff", "*.woff2", "*.ttf", "*.otf", "*.eot",
+            "*.WOFF", "*.WOFF2", "*.TTF", "*.OTF", "*.EOT"
+        ]
+        try:
+            logging.debug("Network.setBlockedURLs: %s", block_urls)
+            driver.execute_cdp_cmd("Network.enable", {})
+            driver.execute_cdp_cmd("Network.setBlockedURLs", {"urls": block_urls})
+        except Exception:
+            # if CDP commands are not available or fail, ignore and continue
+            logging.debug("Network.setBlockedURLs failed or unsupported on this webdriver")
+    # navigate to the page
+    logging.debug(f"Navigating to... {req.url}")
+    turnstile_token = None
+    if method == "POST":
+        _post_request(req, driver)
+    else:
+        if req.tabs_till_verify is None:
+            driver.get(req.url)
+        else:
+            turnstile_token = _resolve_turnstile_captcha(req, driver)
+    # set cookies if required
+    if req.cookies is not None and len(req.cookies) > 0:
+        logging.debug(f'Setting cookies...')
+        for cookie in req.cookies:
+            driver.delete_cookie(cookie['name'])
+            driver.add_cookie(cookie)
+        # reload the page
+        if method == 'POST':
+            _post_request(req, driver)
+        else:
+            driver.get(req.url)
+    # wait for the page
+    if utils.get_config_log_html():
+        logging.debug(f"Response HTML:\n{driver.page_source}")
+    html_element = driver.find_element(By.TAG_NAME, "html")
+    page_title = driver.title
+    # find access denied titles
+    for title in ACCESS_DENIED_TITLES:
+        if page_title.startswith(title):
+            raise Exception('Cloudflare has blocked this request. '
+                            'Probably your IP is banned for this site, check in your web browser.')
+    # find access denied selectors
+    for selector in ACCESS_DENIED_SELECTORS:
+        found_elements = driver.find_elements(By.CSS_SELECTOR, selector)
+        if len(found_elements) > 0:
+            raise Exception('Cloudflare has blocked this request. '
+                            'Probably your IP is banned for this site, check in your web browser.')
+    # find challenge by title
+    challenge_found = False
+    for title in CHALLENGE_TITLES:
+        if title.lower() == page_title.lower():
+            challenge_found = True
+            logging.info("Challenge detected. Title found: " + page_title)
+            break
+    if not challenge_found:
+        # find challenge by selectors
+        for selector in CHALLENGE_SELECTORS:
+            found_elements = driver.find_elements(By.CSS_SELECTOR, selector)
+            if len(found_elements) > 0:
+                challenge_found = True
+                logging.info("Challenge detected. Selector found: " + selector)
+                break
+    attempt = 0
+    if challenge_found:
+        while True:
+            try:
+                attempt = attempt + 1
+                # wait until the title changes
+                for title in CHALLENGE_TITLES:
+                    logging.debug("Waiting for title (attempt " + str(attempt) + "): " + title)
+                    WebDriverWait(driver, SHORT_TIMEOUT).until_not(title_is(title))
+                # then wait until all the selectors disappear
+                for selector in CHALLENGE_SELECTORS:
+                    logging.debug("Waiting for selector (attempt " + str(attempt) + "): " + selector)
+                    WebDriverWait(driver, SHORT_TIMEOUT).until_not(
+                        presence_of_element_located((By.CSS_SELECTOR, selector)))
+                # all elements not found
+                break
+            except TimeoutException:
+                logging.debug("Timeout waiting for selector")
+                click_verify(driver)
+                # update the html (cloudflare reloads the page every 5 s)
+                html_element = driver.find_element(By.TAG_NAME, "html")
+        # waits until cloudflare redirection ends
+        logging.debug("Waiting for redirect")
+        # noinspection PyBroadException
+        try:
+            WebDriverWait(driver, SHORT_TIMEOUT).until(staleness_of(html_element))
+        except Exception:
+            logging.debug("Timeout waiting for redirect")
+        logging.info("Challenge solved!")
+        res.message = "Challenge solved!"
+    else:
+        logging.info("Challenge not detected!")
+        res.message = "Challenge not detected!"
+    challenge_res = ChallengeResolutionResultT({})
+    challenge_res.url = driver.current_url
+    challenge_res.status = 200  # todo: fix, selenium not provides this info
+    challenge_res.cookies = driver.get_cookies()
+    challenge_res.userAgent = utils.get_user_agent(driver)
+    challenge_res.turnstile_token = turnstile_token
+    if not req.returnOnlyCookies:
+        challenge_res.headers = {}  # todo: fix, selenium not provides this info
+        if req.waitInSeconds and req.waitInSeconds > 0:
+            logging.info("Waiting " + str(req.waitInSeconds) + " seconds before returning the response...")
+            time.sleep(req.waitInSeconds)
+        challenge_res.response = driver.page_source
+    if req.returnScreenshot:
+        challenge_res.screenshot = driver.get_screenshot_as_base64()
+    res.result = challenge_res
+    return res
+def _post_request(req: V1RequestBase, driver: WebDriver):
+    post_form = f'<form id="hackForm" action="{req.url}" method="POST">'
+    query_string = req.postData if req.postData and req.postData[0] != '?' else req.postData[1:] if req.postData else ''
+    pairs = query_string.split('&')
+    for pair in pairs:
+        parts = pair.split('=', 1)
+        # noinspection PyBroadException
+        try:
+            name = unquote(parts[0])
+        except Exception:
+            name = parts[0]
+        if name == 'submit':
+            continue
+        # noinspection PyBroadException
+        try:
+            value = unquote(parts[1]) if len(parts) > 1 else ''
+        except Exception:
+            value = parts[1] if len(parts) > 1 else ''
+        # Protection of " character, for syntax
+        value=value.replace('"','&quot;')
+        post_form += f'<input type="text" name="{escape(quote(name))}" value="{escape(quote(value))}"><br>'
+    post_form += '</form>'
+    html_content = f"""
+        <!DOCTYPE html>
+        <html>
+        <body>
+            {post_form}
+            <script>document.getElementById('hackForm').submit();</script>
+        </body>
+        </html>"""
+    driver.get("data:text/html;charset=utf-8,{html_content}".format(html_content=html_content))

flaresolverr/metrics.py ADDED Viewed

	@@ -0,0 +1,32 @@

+import logging
+from prometheus_client import Counter, Histogram, start_http_server
+import time
+REQUEST_COUNTER = Counter(
+    name='flaresolverr_request',
+    documentation='Total requests with result',
+    labelnames=['domain', 'result']
+)
+REQUEST_DURATION = Histogram(
+    name='flaresolverr_request_duration',
+    documentation='Request duration in seconds',
+    labelnames=['domain'],
+    buckets=[0, 10, 25, 50]
+)
+def serve(port):
+    start_http_server(port=port)
+    while True:
+        time.sleep(600)
+def start_metrics_http_server(prometheus_port: int):
+    logging.info(f"Serving Prometheus exporter on http://0.0.0.0:{prometheus_port}/metrics")
+    from threading import Thread
+    Thread(
+        target=serve,
+        kwargs=dict(port=prometheus_port),
+        daemon=True,
+    ).start()

flaresolverr/sessions.py ADDED Viewed

	@@ -0,0 +1,84 @@

+import logging
+from dataclasses import dataclass
+from datetime import datetime, timedelta
+from typing import Optional, Tuple
+from uuid import uuid1
+from selenium.webdriver.chrome.webdriver import WebDriver
+import utils
+@dataclass
+class Session:
+    session_id: str
+    driver: WebDriver
+    created_at: datetime
+    def lifetime(self) -> timedelta:
+        return datetime.now() - self.created_at
+class SessionsStorage:
+    """SessionsStorage creates, stores and process all the sessions"""
+    def __init__(self):
+        self.sessions = {}
+    def create(self, session_id: Optional[str] = None, proxy: Optional[dict] = None,
+               force_new: Optional[bool] = False) -> Tuple[Session, bool]:
+        """create creates new instance of WebDriver if necessary,
+        assign defined (or newly generated) session_id to the instance
+        and returns the session object. If a new session has been created
+        second argument is set to True.
+        Note: The function is idempotent, so in case if session_id
+        already exists in the storage a new instance of WebDriver won't be created
+        and existing session will be returned. Second argument defines if
+        new session has been created (True) or an existing one was used (False).
+        """
+        session_id = session_id or str(uuid1())
+        if force_new:
+            self.destroy(session_id)
+        if self.exists(session_id):
+            return self.sessions[session_id], False
+        driver = utils.get_webdriver(proxy)
+        created_at = datetime.now()
+        session = Session(session_id, driver, created_at)
+        self.sessions[session_id] = session
+        return session, True
+    def exists(self, session_id: str) -> bool:
+        return session_id in self.sessions
+    def destroy(self, session_id: str) -> bool:
+        """destroy closes the driver instance and removes session from the storage.
+        The function is noop if session_id doesn't exist.
+        The function returns True if session was found and destroyed,
+        and False if session_id wasn't found.
+        """
+        if not self.exists(session_id):
+            return False
+        session = self.sessions.pop(session_id)
+        if utils.PLATFORM_VERSION == "nt":
+            session.driver.close()
+        session.driver.quit()
+        return True
+    def get(self, session_id: str, ttl: Optional[timedelta] = None) -> Tuple[Session, bool]:
+        session, fresh = self.create(session_id)
+        if ttl is not None and not fresh and session.lifetime() > ttl:
+            logging.debug(f'session\'s lifetime has expired, so the session is recreated (session_id={session_id})')
+            session, fresh = self.create(session_id, force_new=True)
+        return session, fresh
+    def session_ids(self) -> list[str]:
+        return list(self.sessions.keys())

flaresolverr/tests.py ADDED Viewed

	@@ -0,0 +1,655 @@

+import unittest
+from typing import Optional
+from webtest import TestApp
+from dtos import IndexResponse, HealthResponse, V1ResponseBase, STATUS_OK, STATUS_ERROR
+import flaresolverr
+import utils
+def _find_obj_by_key(key: str, value: str, _list: list) -> Optional[dict]:
+    for obj in _list:
+        if obj[key] == value:
+            return obj
+    return None
+class TestFlareSolverr(unittest.TestCase):
+    proxy_url = "http://127.0.0.1:8888"
+    proxy_socks_url = "socks5://127.0.0.1:1080"
+    google_url = "https://www.google.com"
+    post_url = "https://httpbin.org/post"
+    cloudflare_url = "https://nowsecure.nl/"
+    cloudflare_url_2 = "https://idope.se/torrent-list/harry/"
+    ddos_guard_url = "https://www.litres.ru/"
+    fairlane_url = "https://www.pararius.com/apartments/amsterdam"
+    custom_cloudflare_url = "https://www.muziekfabriek.org/"
+    cloudflare_blocked_url = "https://cpasbiens3.fr/index.php?do=search&subaction=search"
+    app = TestApp(flaresolverr.app)
+    # wait until the server is ready
+    app.get('/')
+    def test_wrong_endpoint(self):
+        res = self.app.get('/wrong', status=404)
+        self.assertEqual(res.status_code, 404)
+        body = res.json
+        self.assertEqual("Not found: '/wrong'", body['error'])
+        self.assertEqual(404, body['status_code'])
+    def test_index_endpoint(self):
+        res = self.app.get('/')
+        self.assertEqual(res.status_code, 200)
+        body = IndexResponse(res.json)
+        self.assertEqual("FlareSolverr is ready!", body.msg)
+        self.assertEqual(utils.get_flaresolverr_version(), body.version)
+        self.assertIn("Chrome/", body.userAgent)
+    def test_health_endpoint(self):
+        res = self.app.get('/health')
+        self.assertEqual(res.status_code, 200)
+        body = HealthResponse(res.json)
+        self.assertEqual(STATUS_OK, body.status)
+    def test_v1_endpoint_wrong_cmd(self):
+        res = self.app.post_json('/v1', {
+            "cmd": "request.bad",
+            "url": self.google_url
+        }, status=500)
+        self.assertEqual(res.status_code, 500)
+        body = V1ResponseBase(res.json)
+        self.assertEqual(STATUS_ERROR, body.status)
+        self.assertEqual("Error: Request parameter 'cmd' = 'request.bad' is invalid.", body.message)
+        self.assertGreater(body.startTimestamp, 10000)
+        self.assertGreaterEqual(body.endTimestamp, body.startTimestamp)
+        self.assertEqual(utils.get_flaresolverr_version(), body.version)
+    def test_v1_endpoint_request_get_no_cloudflare(self):
+        res = self.app.post_json('/v1', {
+            "cmd": "request.get",
+            "url": self.google_url
+        })
+        self.assertEqual(res.status_code, 200)
+        body = V1ResponseBase(res.json)
+        self.assertEqual(STATUS_OK, body.status)
+        self.assertEqual("Challenge not detected!", body.message)
+        self.assertGreater(body.startTimestamp, 10000)
+        self.assertGreaterEqual(body.endTimestamp, body.startTimestamp)
+        self.assertEqual(utils.get_flaresolverr_version(), body.version)
+        solution = body.solution
+        self.assertIn(self.google_url, solution.url)
+        self.assertEqual(solution.status, 200)
+        self.assertIs(len(solution.headers), 0)
+        self.assertIn("<title>Google</title>", solution.response)
+        self.assertGreater(len(solution.cookies), 0)
+        self.assertIn("Chrome/", solution.userAgent)
+    def test_v1_endpoint_request_get_disable_resources(self):
+        res = self.app.post_json("/v1", {
+            "cmd": "request.get",
+            "url": self.google_url,
+            "disableMedia": True
+        })
+        self.assertEqual(res.status_code, 200)
+        body = V1ResponseBase(res.json)
+        self.assertEqual(STATUS_OK, body.status)
+        self.assertEqual("Challenge not detected!", body.message)
+        self.assertGreater(body.startTimestamp, 10000)
+        self.assertGreaterEqual(body.endTimestamp, body.startTimestamp)
+        self.assertEqual(utils.get_flaresolverr_version(), body.version)
+        solution = body.solution
+        self.assertIn(self.google_url, solution.url)
+        self.assertEqual(solution.status, 200)
+        self.assertIs(len(solution.headers), 0)
+        self.assertIn("<title>Google</title>", solution.response)
+        self.assertGreater(len(solution.cookies), 0)
+        self.assertIn("Chrome/", solution.userAgent)
+    def test_v1_endpoint_request_get_cloudflare_js_1(self):
+        res = self.app.post_json('/v1', {
+            "cmd": "request.get",
+            "url": self.cloudflare_url
+        })
+        self.assertEqual(res.status_code, 200)
+        body = V1ResponseBase(res.json)
+        self.assertEqual(STATUS_OK, body.status)
+        self.assertEqual("Challenge solved!", body.message)
+        self.assertGreater(body.startTimestamp, 10000)
+        self.assertGreaterEqual(body.endTimestamp, body.startTimestamp)
+        self.assertEqual(utils.get_flaresolverr_version(), body.version)
+        solution = body.solution
+        self.assertIn(self.cloudflare_url, solution.url)
+        self.assertEqual(solution.status, 200)
+        self.assertIs(len(solution.headers), 0)
+        self.assertIn("<title>nowSecure</title>", solution.response)
+        self.assertGreater(len(solution.cookies), 0)
+        self.assertIn("Chrome/", solution.userAgent)
+        cf_cookie = _find_obj_by_key("name", "cf_clearance", solution.cookies)
+        self.assertIsNotNone(cf_cookie, "Cloudflare cookie not found")
+        self.assertGreater(len(cf_cookie["value"]), 30)
+    def test_v1_endpoint_request_get_cloudflare_js_2(self):
+        res = self.app.post_json('/v1', {
+            "cmd": "request.get",
+            "url": self.cloudflare_url_2
+        })
+        self.assertEqual(res.status_code, 200)
+        body = V1ResponseBase(res.json)
+        self.assertEqual(STATUS_OK, body.status)
+        self.assertEqual("Challenge solved!", body.message)
+        self.assertGreater(body.startTimestamp, 10000)
+        self.assertGreaterEqual(body.endTimestamp, body.startTimestamp)
+        self.assertEqual(utils.get_flaresolverr_version(), body.version)
+        solution = body.solution
+        self.assertIn(self.cloudflare_url_2, solution.url)
+        self.assertEqual(solution.status, 200)
+        self.assertIs(len(solution.headers), 0)
+        self.assertIn("<title>harry - idope torrent search</title>", solution.response)
+        self.assertGreater(len(solution.cookies), 0)
+        self.assertIn("Chrome/", solution.userAgent)
+        cf_cookie = _find_obj_by_key("name", "cf_clearance", solution.cookies)
+        self.assertIsNotNone(cf_cookie, "Cloudflare cookie not found")
+        self.assertGreater(len(cf_cookie["value"]), 30)
+    def test_v1_endpoint_request_get_ddos_guard_js(self):
+        res = self.app.post_json('/v1', {
+            "cmd": "request.get",
+            "url": self.ddos_guard_url
+        })
+        self.assertEqual(res.status_code, 200)
+        body = V1ResponseBase(res.json)
+        self.assertEqual(STATUS_OK, body.status)
+        self.assertEqual("Challenge solved!", body.message)
+        self.assertGreater(body.startTimestamp, 10000)
+        self.assertGreaterEqual(body.endTimestamp, body.startTimestamp)
+        self.assertEqual(utils.get_flaresolverr_version(), body.version)
+        solution = body.solution
+        self.assertIn(self.ddos_guard_url, solution.url)
+        self.assertEqual(solution.status, 200)
+        self.assertIs(len(solution.headers), 0)
+        self.assertIn("<title>Литрес", solution.response)
+        self.assertGreater(len(solution.cookies), 0)
+        self.assertIn("Chrome/", solution.userAgent)
+        cf_cookie = _find_obj_by_key("name", "__ddg1_", solution.cookies)
+        self.assertIsNotNone(cf_cookie, "DDOS-Guard cookie not found")
+        self.assertGreater(len(cf_cookie["value"]), 10)
+    def test_v1_endpoint_request_get_fairlane_js(self):
+        res = self.app.post_json('/v1', {
+            "cmd": "request.get",
+            "url": self.fairlane_url
+        })
+        self.assertEqual(res.status_code, 200)
+        body = V1ResponseBase(res.json)
+        self.assertEqual(STATUS_OK, body.status)
+        self.assertEqual("Challenge solved!", body.message)
+        self.assertGreater(body.startTimestamp, 10000)
+        self.assertGreaterEqual(body.endTimestamp, body.startTimestamp)
+        self.assertEqual(utils.get_flaresolverr_version(), body.version)
+        solution = body.solution
+        self.assertIn(self.fairlane_url, solution.url)
+        self.assertEqual(solution.status, 200)
+        self.assertIs(len(solution.headers), 0)
+        self.assertIn("<title>Rental Apartments Amsterdam</title>", solution.response)
+        self.assertGreater(len(solution.cookies), 0)
+        self.assertIn("Chrome/", solution.userAgent)
+        cf_cookie = _find_obj_by_key("name", "fl_pass_v2_b", solution.cookies)
+        self.assertIsNotNone(cf_cookie, "Fairlane cookie not found")
+        self.assertGreater(len(cf_cookie["value"]), 50)
+    def test_v1_endpoint_request_get_custom_cloudflare_js(self):
+        res = self.app.post_json('/v1', {
+            "cmd": "request.get",
+            "url": self.custom_cloudflare_url
+        })
+        self.assertEqual(res.status_code, 200)
+        body = V1ResponseBase(res.json)
+        self.assertEqual(STATUS_OK, body.status)
+        self.assertEqual("Challenge solved!", body.message)
+        self.assertGreater(body.startTimestamp, 10000)
+        self.assertGreaterEqual(body.endTimestamp, body.startTimestamp)
+        self.assertEqual(utils.get_flaresolverr_version(), body.version)
+        solution = body.solution
+        self.assertIn(self.custom_cloudflare_url, solution.url)
+        self.assertEqual(solution.status, 200)
+        self.assertIs(len(solution.headers), 0)
+        self.assertIn("<title>MuziekFabriek : Aanmelden</title>", solution.response)
+        self.assertGreater(len(solution.cookies), 0)
+        self.assertIn("Chrome/", solution.userAgent)
+        cf_cookie = _find_obj_by_key("name", "ct_anti_ddos_key", solution.cookies)
+        self.assertIsNotNone(cf_cookie, "Custom Cloudflare cookie not found")
+        self.assertGreater(len(cf_cookie["value"]), 10)
+    # todo: test Cmd 'request.get' should return fail with Cloudflare CAPTCHA
+    def test_v1_endpoint_request_get_cloudflare_blocked(self):
+        res = self.app.post_json('/v1', {
+            "cmd": "request.get",
+            "url": self.cloudflare_blocked_url
+        }, status=500)
+        self.assertEqual(res.status_code, 500)
+        body = V1ResponseBase(res.json)
+        self.assertEqual(STATUS_ERROR, body.status)
+        self.assertEqual("Error: Error solving the challenge. Cloudflare has blocked this request. "
+                         "Probably your IP is banned for this site, check in your web browser.", body.message)
+        self.assertGreater(body.startTimestamp, 10000)
+        self.assertGreaterEqual(body.endTimestamp, body.startTimestamp)
+        self.assertEqual(utils.get_flaresolverr_version(), body.version)
+    def test_v1_endpoint_request_get_cookies_param(self):
+        res = self.app.post_json('/v1', {
+            "cmd": "request.get",
+            "url": self.google_url,
+            "cookies": [
+                {
+                    "name": "testcookie1",
+                    "value": "testvalue1"
+                },
+                {
+                    "name": "testcookie2",
+                    "value": "testvalue2"
+                }
+            ]
+        })
+        self.assertEqual(res.status_code, 200)
+        body = V1ResponseBase(res.json)
+        self.assertEqual(STATUS_OK, body.status)
+        self.assertEqual("Challenge not detected!", body.message)
+        self.assertGreater(body.startTimestamp, 10000)
+        self.assertGreaterEqual(body.endTimestamp, body.startTimestamp)
+        self.assertEqual(utils.get_flaresolverr_version(), body.version)
+        solution = body.solution
+        self.assertIn(self.google_url, solution.url)
+        self.assertEqual(solution.status, 200)
+        self.assertIs(len(solution.headers), 0)
+        self.assertIn("<title>Google</title>", solution.response)
+        self.assertGreater(len(solution.cookies), 1)
+        self.assertIn("Chrome/", solution.userAgent)
+        user_cookie1 = _find_obj_by_key("name", "testcookie1", solution.cookies)
+        self.assertIsNotNone(user_cookie1, "User cookie 1 not found")
+        self.assertEqual("testvalue1", user_cookie1["value"])
+        user_cookie2 = _find_obj_by_key("name", "testcookie2", solution.cookies)
+        self.assertIsNotNone(user_cookie2, "User cookie 2 not found")
+        self.assertEqual("testvalue2", user_cookie2["value"])
+    def test_v1_endpoint_request_get_returnOnlyCookies_param(self):
+        res = self.app.post_json('/v1', {
+            "cmd": "request.get",
+            "url": self.google_url,
+            "returnOnlyCookies": True
+        })
+        self.assertEqual(res.status_code, 200)
+        body = V1ResponseBase(res.json)
+        self.assertEqual(STATUS_OK, body.status)
+        self.assertEqual("Challenge not detected!", body.message)
+        self.assertGreater(body.startTimestamp, 10000)
+        self.assertGreaterEqual(body.endTimestamp, body.startTimestamp)
+        self.assertEqual(utils.get_flaresolverr_version(), body.version)
+        solution = body.solution
+        self.assertIn(self.google_url, solution.url)
+        self.assertEqual(solution.status, 200)
+        self.assertIsNone(solution.headers)
+        self.assertIsNone(solution.response)
+        self.assertGreater(len(solution.cookies), 0)
+        self.assertIn("Chrome/", solution.userAgent)
+    def test_v1_endpoint_request_get_proxy_http_param(self):
+        """
+        To configure TinyProxy in local:
+           * sudo vim /etc/tinyproxy/tinyproxy.conf
+              * edit => LogFile "/tmp/tinyproxy.log"
+              * edit => Syslog Off
+           * sudo tinyproxy -d
+           * sudo tail -f /tmp/tinyproxy.log
+        """
+        res = self.app.post_json('/v1', {
+            "cmd": "request.get",
+            "url": self.google_url,
+            "proxy": {
+                "url": self.proxy_url
+            }
+        })
+        self.assertEqual(res.status_code, 200)
+        body = V1ResponseBase(res.json)
+        self.assertEqual(STATUS_OK, body.status)
+        self.assertEqual("Challenge not detected!", body.message)
+        self.assertGreater(body.startTimestamp, 10000)
+        self.assertGreaterEqual(body.endTimestamp, body.startTimestamp)
+        self.assertEqual(utils.get_flaresolverr_version(), body.version)
+        solution = body.solution
+        self.assertIn(self.google_url, solution.url)
+        self.assertEqual(solution.status, 200)
+        self.assertIs(len(solution.headers), 0)
+        self.assertIn("<title>Google</title>", solution.response)
+        self.assertGreater(len(solution.cookies), 0)
+        self.assertIn("Chrome/", solution.userAgent)
+    def test_v1_endpoint_request_get_proxy_http_param_with_credentials(self):
+        """
+        To configure TinyProxy in local:
+           * sudo vim /etc/tinyproxy/tinyproxy.conf
+              * edit => LogFile "/tmp/tinyproxy.log"
+              * edit => Syslog Off
+              * add => BasicAuth testuser testpass
+           * sudo tinyproxy -d
+           * sudo tail -f /tmp/tinyproxy.log
+        """
+        res = self.app.post_json('/v1', {
+            "cmd": "request.get",
+            "url": self.google_url,
+            "proxy": {
+                "url": self.proxy_url,
+                "username": "testuser",
+                "password": "testpass"
+            }
+        })
+        self.assertEqual(res.status_code, 200)
+        body = V1ResponseBase(res.json)
+        self.assertEqual(STATUS_OK, body.status)
+        self.assertEqual("Challenge not detected!", body.message)
+        self.assertGreater(body.startTimestamp, 10000)
+        self.assertGreaterEqual(body.endTimestamp, body.startTimestamp)
+        self.assertEqual(utils.get_flaresolverr_version(), body.version)
+        solution = body.solution
+        self.assertIn(self.google_url, solution.url)
+        self.assertEqual(solution.status, 200)
+        self.assertIs(len(solution.headers), 0)
+        self.assertIn("<title>Google</title>", solution.response)
+        self.assertGreater(len(solution.cookies), 0)
+        self.assertIn("Chrome/", solution.userAgent)
+    def test_v1_endpoint_request_get_proxy_socks_param(self):
+        """
+        To configure Dante in local:
+           * https://linuxhint.com/set-up-a-socks5-proxy-on-ubuntu-with-dante/
+           * sudo vim /etc/sockd.conf
+           * sudo systemctl restart sockd.service
+           * curl --socks5 socks5://127.0.0.1:1080 https://www.google.com
+        """
+        res = self.app.post_json('/v1', {
+            "cmd": "request.get",
+            "url": self.google_url,
+            "proxy": {
+                "url": self.proxy_socks_url
+            }
+        })
+        self.assertEqual(res.status_code, 200)
+        body = V1ResponseBase(res.json)
+        self.assertEqual(STATUS_OK, body.status)
+        self.assertEqual("Challenge not detected!", body.message)
+        self.assertGreater(body.startTimestamp, 10000)
+        self.assertGreaterEqual(body.endTimestamp, body.startTimestamp)
+        self.assertEqual(utils.get_flaresolverr_version(), body.version)
+        solution = body.solution
+        self.assertIn(self.google_url, solution.url)
+        self.assertEqual(solution.status, 200)
+        self.assertIs(len(solution.headers), 0)
+        self.assertIn("<title>Google</title>", solution.response)
+        self.assertGreater(len(solution.cookies), 0)
+        self.assertIn("Chrome/", solution.userAgent)
+    def test_v1_endpoint_request_get_proxy_wrong_param(self):
+        res = self.app.post_json('/v1', {
+            "cmd": "request.get",
+            "url": self.google_url,
+            "proxy": {
+                "url": "http://127.0.0.1:43210"
+            }
+        }, status=500)
+        self.assertEqual(res.status_code, 500)
+        body = V1ResponseBase(res.json)
+        self.assertEqual(STATUS_ERROR, body.status)
+        self.assertIn("Error: Error solving the challenge. Message: unknown error: net::ERR_PROXY_CONNECTION_FAILED",
+                      body.message)
+        self.assertGreater(body.startTimestamp, 10000)
+        self.assertGreaterEqual(body.endTimestamp, body.startTimestamp)
+        self.assertEqual(utils.get_flaresolverr_version(), body.version)
+    def test_v1_endpoint_request_get_fail_timeout(self):
+        res = self.app.post_json('/v1', {
+            "cmd": "request.get",
+            "url": self.google_url,
+            "maxTimeout": 10
+        }, status=500)
+        self.assertEqual(res.status_code, 500)
+        body = V1ResponseBase(res.json)
+        self.assertEqual(STATUS_ERROR, body.status)
+        self.assertEqual("Error: Error solving the challenge. Timeout after 0.01 seconds.", body.message)
+        self.assertGreater(body.startTimestamp, 10000)
+        self.assertGreaterEqual(body.endTimestamp, body.startTimestamp)
+        self.assertEqual(utils.get_flaresolverr_version(), body.version)
+    def test_v1_endpoint_request_get_fail_bad_domain(self):
+        res = self.app.post_json('/v1', {
+            "cmd": "request.get",
+            "url": "https://www.google.combad"
+        }, status=500)
+        self.assertEqual(res.status_code, 500)
+        body = V1ResponseBase(res.json)
+        self.assertEqual(STATUS_ERROR, body.status)
+        self.assertIn("Message: unknown error: net::ERR_NAME_NOT_RESOLVED", body.message)
+    def test_v1_endpoint_request_get_deprecated_param(self):
+        res = self.app.post_json('/v1', {
+            "cmd": "request.get",
+            "url": self.google_url,
+            "userAgent": "Test User-Agent"  # was removed in v2, not used
+        })
+        self.assertEqual(res.status_code, 200)
+        body = V1ResponseBase(res.json)
+        self.assertEqual(STATUS_OK, body.status)
+        self.assertEqual("Challenge not detected!", body.message)
+    def test_v1_endpoint_request_post_no_cloudflare(self):
+        res = self.app.post_json('/v1', {
+            "cmd": "request.post",
+            "url": self.post_url,
+            "postData": "param1=value1&param2=value2"
+        })
+        self.assertEqual(res.status_code, 200)
+        body = V1ResponseBase(res.json)
+        self.assertEqual(STATUS_OK, body.status)
+        self.assertEqual("Challenge not detected!", body.message)
+        self.assertGreater(body.startTimestamp, 10000)
+        self.assertGreaterEqual(body.endTimestamp, body.startTimestamp)
+        self.assertEqual(utils.get_flaresolverr_version(), body.version)
+        solution = body.solution
+        self.assertIn(self.post_url, solution.url)
+        self.assertEqual(solution.status, 200)
+        self.assertIs(len(solution.headers), 0)
+        self.assertIn('"form": {\n    "param1": "value1", \n    "param2": "value2"\n  }', solution.response)
+        self.assertEqual(len(solution.cookies), 0)
+        self.assertIn("Chrome/", solution.userAgent)
+    def test_v1_endpoint_request_post_cloudflare(self):
+        res = self.app.post_json('/v1', {
+            "cmd": "request.post",
+            "url": self.cloudflare_url,
+            "postData": "param1=value1&param2=value2"
+        })
+        self.assertEqual(res.status_code, 200)
+        body = V1ResponseBase(res.json)
+        self.assertEqual(STATUS_OK, body.status)
+        self.assertEqual("Challenge solved!", body.message)
+        self.assertGreater(body.startTimestamp, 10000)
+        self.assertGreaterEqual(body.endTimestamp, body.startTimestamp)
+        self.assertEqual(utils.get_flaresolverr_version(), body.version)
+        solution = body.solution
+        self.assertIn(self.cloudflare_url, solution.url)
+        self.assertEqual(solution.status, 200)
+        self.assertIs(len(solution.headers), 0)
+        self.assertIn("<title>405 Not Allowed</title>", solution.response)
+        self.assertGreater(len(solution.cookies), 0)
+        self.assertIn("Chrome/", solution.userAgent)
+        cf_cookie = _find_obj_by_key("name", "cf_clearance", solution.cookies)
+        self.assertIsNotNone(cf_cookie, "Cloudflare cookie not found")
+        self.assertGreater(len(cf_cookie["value"]), 30)
+    def test_v1_endpoint_request_post_fail_no_post_data(self):
+        res = self.app.post_json('/v1', {
+            "cmd": "request.post",
+            "url": self.google_url
+        }, status=500)
+        self.assertEqual(res.status_code, 500)
+        body = V1ResponseBase(res.json)
+        self.assertEqual(STATUS_ERROR, body.status)
+        self.assertIn("Request parameter 'postData' is mandatory in 'request.post' command", body.message)
+    def test_v1_endpoint_request_post_deprecated_param(self):
+        res = self.app.post_json('/v1', {
+            "cmd": "request.post",
+            "url": self.google_url,
+            "postData": "param1=value1&param2=value2",
+            "userAgent": "Test User-Agent"  # was removed in v2, not used
+        })
+        self.assertEqual(res.status_code, 200)
+        body = V1ResponseBase(res.json)
+        self.assertEqual(STATUS_OK, body.status)
+        self.assertEqual("Challenge not detected!", body.message)
+    def test_v1_endpoint_sessions_create_without_session(self):
+        res = self.app.post_json('/v1', {
+            "cmd": "sessions.create"
+        })
+        self.assertEqual(res.status_code, 200)
+        body = V1ResponseBase(res.json)
+        self.assertEqual(STATUS_OK, body.status)
+        self.assertEqual("Session created successfully.", body.message)
+        self.assertIsNotNone(body.session)
+    def test_v1_endpoint_sessions_create_with_session(self):
+        res = self.app.post_json('/v1', {
+            "cmd": "sessions.create",
+            "session": "test_create_session"
+        })
+        self.assertEqual(res.status_code, 200)
+        body = V1ResponseBase(res.json)
+        self.assertEqual(STATUS_OK, body.status)
+        self.assertEqual("Session created successfully.", body.message)
+        self.assertEqual(body.session, "test_create_session")
+    def test_v1_endpoint_sessions_create_with_proxy(self):
+        res = self.app.post_json('/v1', {
+            "cmd": "sessions.create",
+            "proxy": {
+                "url": self.proxy_url
+            }
+        })
+        self.assertEqual(res.status_code, 200)
+        body = V1ResponseBase(res.json)
+        self.assertEqual(STATUS_OK, body.status)
+        self.assertEqual("Session created successfully.", body.message)
+        self.assertIsNotNone(body.session)
+    def test_v1_endpoint_sessions_list(self):
+        self.app.post_json('/v1', {
+            "cmd": "sessions.create",
+            "session": "test_list_sessions"
+        })
+        res = self.app.post_json('/v1', {
+            "cmd": "sessions.list"
+        })
+        self.assertEqual(res.status_code, 200)
+        body = V1ResponseBase(res.json)
+        self.assertEqual(STATUS_OK, body.status)
+        self.assertEqual("", body.message)
+        self.assertGreaterEqual(len(body.sessions), 1)
+        self.assertIn("test_list_sessions", body.sessions)
+    def test_v1_endpoint_sessions_destroy_existing_session(self):
+        self.app.post_json('/v1', {
+            "cmd": "sessions.create",
+            "session": "test_destroy_sessions"
+        })
+        res = self.app.post_json('/v1', {
+            "cmd": "sessions.destroy",
+            "session": "test_destroy_sessions"
+        })
+        self.assertEqual(res.status_code, 200)
+        body = V1ResponseBase(res.json)
+        self.assertEqual(STATUS_OK, body.status)
+        self.assertEqual("The session has been removed.", body.message)
+    def test_v1_endpoint_sessions_destroy_non_existing_session(self):
+        res = self.app.post_json('/v1', {
+            "cmd": "sessions.destroy",
+            "session": "non_existing_session_name"
+        }, status=500)
+        self.assertEqual(res.status_code, 500)
+        body = V1ResponseBase(res.json)
+        self.assertEqual(STATUS_ERROR, body.status)
+        self.assertEqual("Error: The session doesn't exist.", body.message)
+    def test_v1_endpoint_request_get_with_session(self):
+        self.app.post_json('/v1', {
+            "cmd": "sessions.create",
+            "session": "test_request_sessions"
+        })
+        res = self.app.post_json('/v1', {
+            "cmd": "request.get",
+            "session": "test_request_sessions",
+            "url": self.google_url
+        })
+        self.assertEqual(res.status_code, 200)
+        body = V1ResponseBase(res.json)
+        self.assertEqual(STATUS_OK, body.status)
+if __name__ == '__main__':
+    unittest.main()

flaresolverr/tests_sites.py ADDED Viewed

	@@ -0,0 +1,102 @@

+import unittest
+from webtest import TestApp
+from dtos import V1ResponseBase, STATUS_OK
+import flaresolverr
+import utils
+def _find_obj_by_key(key: str, value: str, _list: list) -> dict | None:
+    for obj in _list:
+        if obj[key] == value:
+            return obj
+    return None
+def asset_cloudflare_solution(self, res, site_url, site_text):
+    self.assertEqual(res.status_code, 200)
+    body = V1ResponseBase(res.json)
+    self.assertEqual(STATUS_OK, body.status)
+    self.assertEqual("Challenge solved!", body.message)
+    self.assertGreater(body.startTimestamp, 10000)
+    self.assertGreaterEqual(body.endTimestamp, body.startTimestamp)
+    self.assertEqual(utils.get_flaresolverr_version(), body.version)
+    solution = body.solution
+    self.assertIn(site_url, solution.url)
+    self.assertEqual(solution.status, 200)
+    self.assertIs(len(solution.headers), 0)
+    self.assertIn(site_text, solution.response)
+    self.assertGreater(len(solution.cookies), 0)
+    self.assertIn("Chrome/", solution.userAgent)
+    cf_cookie = _find_obj_by_key("name", "cf_clearance", solution.cookies)
+    self.assertIsNotNone(cf_cookie, "Cloudflare cookie not found")
+    self.assertGreater(len(cf_cookie["value"]), 30)
+class TestFlareSolverr(unittest.TestCase):
+    app = TestApp(flaresolverr.app)
+    # wait until the server is ready
+    app.get('/')
+    def test_v1_endpoint_request_get_cloudflare(self):
+        sites_get = [
+            ('nowsecure', 'https://nowsecure.nl', '<title>nowSecure</title>'),
+            ('0magnet', 'https://0magnet.com/search?q=2022', 'Torrent Search - ØMagnet'),
+            ('1337x', 'https://1337x.unblockit.cat/cat/Movies/time/desc/1/', ''),
+            ('avistaz', 'https://avistaz.to/api/v1/jackett/torrents?in=1&type=0&search=',
+             '<title>Access denied</title>'),
+            ('badasstorrents', 'https://badasstorrents.com/torrents/search/720p/date/desc',
+             '<title>Latest Torrents - BadassTorrents</title>'),
+            ('bt4g', 'https://bt4g.org/search/2022', '<title>Download 2022 Torrents - BT4G</title>'),
+            ('cinemaz', 'https://cinemaz.to/api/v1/jackett/torrents?in=1&type=0&search=',
+             '<title>Access denied</title>'),
+            ('epublibre', 'https://epublibre.unblockit.cat/catalogo/index/0/nuevo/todos/sin/todos/--/ajax',
+             '<title>epublibre - catálogo</title>'),
+            ('ext', 'https://ext.to/latest/?order=age&sort=desc',
+             '<title>Download Latest Torrents - EXT Torrents</title>'),
+            ('extratorrent', 'https://extratorrent.st/search/?srt=added&order=desc&search=720p&new=1&x=0&y=0',
+             'Page 1 - ExtraTorrent'),
+            ('idope', 'https://idope.se/browse.html', '<title>Recent Torrents</title>'),
+            ('limetorrents', 'https://limetorrents.unblockninja.com/latest100',
+             '<title>Latest 100 torrents - LimeTorrents</title>'),
+            ('privatehd', 'https://privatehd.to/api/v1/jackett/torrents?in=1&type=0&search=',
+             '<title>Access denied</title>'),
+            ('torrentcore', 'https://torrentcore.xyz/index', '<title>Torrent[CORE] - Torrent community.</title>'),
+            ('torrentqq223', 'https://torrentqq223.com/torrent/newest.html', 'https://torrentqq223.com/ads/'),
+            ('36dm', 'https://www.36dm.club/1.html', 'https://www.36dm.club/yesterday-1.html'),
+            ('erai-raws', 'https://www.erai-raws.info/feed/?type=magnet', '403 Forbidden'),
+            ('teamos', 'https://www.teamos.xyz/torrents/?filename=&freeleech=',
+             '<title>Log in | Team OS : Your Only Destination To Custom OS !!</title>'),
+            ('yts', 'https://yts.unblockninja.com/api/v2/list_movies.json?query_term=&limit=50&sort=date_added',
+             '{"movie_count":')
+        ]
+        for site_name, site_url, site_text in sites_get:
+            with self.subTest(msg=site_name):
+                res = self.app.post_json('/v1', {
+                    "cmd": "request.get",
+                    "url": site_url
+                })
+                asset_cloudflare_solution(self, res, site_url, site_text)
+    def test_v1_endpoint_request_post_cloudflare(self):
+        sites_post = [
+            ('nnmclub', 'https://nnmclub.to/forum/tracker.php', '<title>Трекер :: NNM-Club</title>',
+             'prev_sd=0&prev_a=0&prev_my=0&prev_n=0&prev_shc=0&prev_shf=1&prev_sha=1&prev_shs=0&prev_shr=0&prev_sht=0&f%5B%5D=-1&o=1&s=2&tm=-1&shf=1&sha=1&ta=-1&sns=-1&sds=-1&nm=&pn=&submit=%CF%EE%E8%F1%EA')
+        ]
+        for site_name, site_url, site_text, post_data in sites_post:
+            with self.subTest(msg=site_name):
+                res = self.app.post_json('/v1', {
+                    "cmd": "request.post",
+                    "url": site_url,
+                    "postData": post_data
+                })
+                asset_cloudflare_solution(self, res, site_url, site_text)
+if __name__ == '__main__':
+    unittest.main()

flaresolverr/undetected_chromedriver/__init__.py ADDED Viewed

	@@ -0,0 +1,910 @@

+#!/usr/bin/env python3
+"""
+         888                                                  888         d8b
+         888                                                  888         Y8P
+         888                                                  888
+ .d8888b 88888b.  888d888 .d88b.  88888b.d88b.   .d88b.   .d88888 888d888 888 888  888  .d88b.  888d888
+d88P"    888 "88b 888P"  d88""88b 888 "888 "88b d8P  Y8b d88" 888 888P"   888 888  888 d8P  Y8b 888P"
+888      888  888 888    888  888 888  888  888 88888888 888  888 888     888 Y88  88P 88888888 888
+Y88b.    888  888 888    Y88..88P 888  888  888 Y8b.     Y88b 888 888     888  Y8bd8P  Y8b.     888
+ "Y8888P 888  888 888     "Y88P"  888  888  888  "Y8888   "Y88888 888     888   Y88P    "Y8888  888   88888888
+by UltrafunkAmsterdam (https://github.com/ultrafunkamsterdam)
+"""
+from __future__ import annotations
+__version__ = "3.5.5"
+import json
+import logging
+import os
+import pathlib
+import re
+import shutil
+import subprocess
+import sys
+import tempfile
+import time
+from weakref import finalize
+import selenium.webdriver.chrome.service
+import selenium.webdriver.chrome.webdriver
+from selenium.webdriver.common.by import By
+import selenium.webdriver.chromium.service
+import selenium.webdriver.remote.command
+import selenium.webdriver.remote.webdriver
+from .cdp import CDP
+from .dprocess import start_detached
+from .options import ChromeOptions
+from .patcher import IS_POSIX
+from .patcher import Patcher
+from .reactor import Reactor
+from .webelement import UCWebElement
+from .webelement import WebElement
+__all__ = (
+    "Chrome",
+    "ChromeOptions",
+    "Patcher",
+    "Reactor",
+    "CDP",
+    "find_chrome_executable",
+)
+logger = logging.getLogger("uc")
+logger.setLevel(logging.getLogger().getEffectiveLevel())
+class Chrome(selenium.webdriver.chrome.webdriver.WebDriver):
+    """
+    Controls the ChromeDriver and allows you to drive the browser.
+    The webdriver file will be downloaded by this module automatically,
+    you do not need to specify this. however, you may if you wish.
+    Attributes
+    ----------
+    Methods
+    -------
+    reconnect()
+        this can be useful in case of heavy detection methods
+        -stops the chromedriver service which runs in the background
+        -starts the chromedriver service which runs in the background
+        -recreate session
+    start_session(capabilities=None, browser_profile=None)
+        differentiates from the regular method in that it does not
+        require a capabilities argument. The capabilities are automatically
+        recreated from the options at creation time.
+    --------------------------------------------------------------------------
+        NOTE:
+            Chrome has everything included to work out of the box.
+            it does not `need` customizations.
+            any customizations MAY lead to trigger bot migitation systems.
+    --------------------------------------------------------------------------
+    """
+    _instances = set()
+    session_id = None
+    debug = False
+    def __init__(
+        self,
+        options=None,
+        user_data_dir=None,
+        driver_executable_path=None,
+        browser_executable_path=None,
+        port=0,
+        enable_cdp_events=False,
+        # service_args=None,
+        # service_creationflags=None,
+        desired_capabilities=None,
+        advanced_elements=False,
+        # service_log_path=None,
+        keep_alive=True,
+        log_level=0,
+        headless=False,
+        version_main=None,
+        patcher_force_close=False,
+        suppress_welcome=True,
+        use_subprocess=False,
+        debug=False,
+        no_sandbox=True,
+        windows_headless=False,
+        user_multi_procs: bool = False,
+        **kw,
+    ):
+        """
+        Creates a new instance of the chrome driver.
+        Starts the service and then creates new instance of chrome driver.
+        Parameters
+        ----------
+        options: ChromeOptions, optional, default: None - automatic useful defaults
+            this takes an instance of ChromeOptions, mainly to customize browser behavior.
+            anything other dan the default, for example extensions or startup options
+            are not supported in case of failure, and can probably lowers your undetectability.
+        user_data_dir: str , optional, default: None (creates temp profile)
+            if user_data_dir is a path to a valid chrome profile directory, use it,
+            and turn off automatic removal mechanism at exit.
+        driver_executable_path: str, optional, default: None(=downloads and patches new binary)
+        browser_executable_path: str, optional, default: None - use find_chrome_executable
+            Path to the browser executable.
+            If not specified, make sure the executable's folder is in $PATH
+        port: int, optional, default: 0
+            port to be used by the chromedriver executable, this is NOT the debugger port.
+            leave it at 0 unless you know what you are doing.
+            the default value of 0 automatically picks an available port.
+        enable_cdp_events: bool, default: False
+            :: currently for chrome only
+            this enables the handling of wire messages
+            when enabled, you can subscribe to CDP events by using:
+                driver.add_cdp_listener("Network.dataReceived", yourcallback)
+                # yourcallback is an callable which accepts exactly 1 dict as parameter
+        service_args: list of str, optional, default: None
+            arguments to pass to the driver service
+        desired_capabilities: dict, optional, default: None - auto from config
+            Dictionary object with non-browser specific capabilities only, such as "item" or "loggingPref".
+        advanced_elements:  bool, optional, default: False
+            makes it easier to recognize elements like you know them from html/browser inspection, especially when working
+            in an interactive environment
+            default webelement repr:
+            <selenium.webdriver.remote.webelement.WebElement (session="85ff0f671512fa535630e71ee951b1f2", element="6357cb55-92c3-4c0f-9416-b174f9c1b8c4")>
+            advanced webelement repr
+            <WebElement(<a class="mobile-show-inline-block mc-update-infos init-ok" href="#" id="main-cat-switcher-mobile">)>
+            note: when retrieving large amounts of elements ( example: find_elements_by_tag("*") ) and print them, it does take a little more time.
+        service_log_path: str, optional, default: None
+             path to log information from the driver.
+        keep_alive: bool, optional, default: True
+             Whether to configure ChromeRemoteConnection to use HTTP keep-alive.
+        log_level: int, optional, default: adapts to python global log level
+        headless: bool, optional, default: False
+            can also be specified in the options instance.
+            Specify whether you want to use the browser in headless mode.
+            warning: this lowers undetectability and not fully supported.
+        version_main: int, optional, default: None (=auto)
+            if you, for god knows whatever reason, use
+            an older version of Chrome. You can specify it's full rounded version number
+            here. Example: 87 for all versions of 87
+        patcher_force_close: bool, optional, default: False
+            instructs the patcher to do whatever it can to access the chromedriver binary
+            if the file is locked, it will force shutdown all instances.
+            setting it is not recommended, unless you know the implications and think
+            you might need it.
+        suppress_welcome: bool, optional , default: True
+            a "welcome" alert might show up on *nix-like systems asking whether you want to set
+            chrome as your default browser, and if you want to send even more data to google.
+            now, in case you are nag-fetishist, or a diagnostics data feeder to google, you can set this to False.
+            Note: if you don't handle the nag screen in time, the browser loses it's connection and throws an Exception.
+        use_subprocess: bool, optional , default: True,
+            False (the default) makes sure Chrome will get it's own process (so no subprocess of chromedriver.exe or python
+                This fixes a LOT of issues, like multithreaded run, but mst importantly. shutting corectly after
+                program exits or using .quit()
+                you should be knowing what you're doing, and know how python works.
+              unfortunately, there  is always an edge case in which one would like to write an single script with the only contents being:
+              --start script--
+              import undetected_chromedriver as uc
+              d = uc.Chrome()
+              d.get('https://somesite/')
+              ---end script --
+              and will be greeted with an error, since the program exists before chrome has a change to launch.
+              in that case you can set this to `True`. The browser will start via subprocess, and will keep running most of times.
+              ! setting it to True comes with NO support when being detected. !
+        no_sandbox: bool, optional, default=True
+             uses the --no-sandbox option, and additionally does suppress the "unsecure option" status bar
+             this option has a default of True since many people seem to run this as root (....) , and chrome does not start
+             when running as root without using --no-sandbox flag.
+        user_multi_procs:
+            set to true when you are using multithreads/multiprocessing
+            ensures not all processes are trying to modify a binary which is in use by another.
+            for this to work. YOU MUST HAVE AT LEAST 1 UNDETECTED_CHROMEDRIVER BINARY IN YOUR ROAMING DATA FOLDER.
+            this requirement can be easily satisfied, by just running this program "normal" and close/kill it.
+        """
+        finalize(self, self._ensure_close, self)
+        self.debug = debug
+        self.patcher = Patcher(
+            executable_path=driver_executable_path,
+            force=patcher_force_close,
+            version_main=version_main,
+            user_multi_procs=user_multi_procs,
+        )
+        # self.patcher.auto(user_multiprocess = user_multi_num_procs)
+        self.patcher.auto()
+        # self.patcher = patcher
+        if not options:
+            options = ChromeOptions()
+        try:
+            if hasattr(options, "_session") and options._session is not None:
+                #  prevent reuse of options,
+                #  as it just appends arguments, not replace them
+                #  you'll get conflicts starting chrome
+                raise RuntimeError("you cannot reuse the ChromeOptions object")
+        except AttributeError:
+            pass
+        options._session = self
+        if not options.debugger_address:
+            debug_port = (
+                port
+                if port != 0
+                else selenium.webdriver.common.service.utils.free_port()
+            )
+            debug_host = "127.0.0.1"
+            options.debugger_address = "%s:%d" % (debug_host, debug_port)
+        else:
+            debug_host, debug_port = options.debugger_address.split(":")
+            debug_port = int(debug_port)
+        if enable_cdp_events:
+            options.set_capability(
+                "goog:loggingPrefs", {"performance": "ALL", "browser": "ALL"}
+            )
+        options.add_argument("--remote-debugging-host=%s" % debug_host)
+        options.add_argument("--remote-debugging-port=%s" % debug_port)
+        if user_data_dir:
+            options.add_argument("--user-data-dir=%s" % user_data_dir)
+        language, keep_user_data_dir = None, bool(user_data_dir)
+        # see if a custom user profile is specified in options
+        for arg in options.arguments:
+            if any([_ in arg for _ in ("--headless", "headless")]):
+                options.arguments.remove(arg)
+                options.headless = True
+            if "lang" in arg:
+                m = re.search("(?:--)?lang(?:[ =])?(.*)", arg)
+                try:
+                    language = m[1]
+                except IndexError:
+                    logger.debug("will set the language to en-US,en;q=0.9")
+                    language = "en-US,en;q=0.9"
+            if "user-data-dir" in arg:
+                m = re.search("(?:--)?user-data-dir(?:[ =])?(.*)", arg)
+                try:
+                    user_data_dir = m[1]
+                    logger.debug(
+                        "user-data-dir found in user argument %s => %s" % (arg, m[1])
+                    )
+                    keep_user_data_dir = True
+                except IndexError:
+                    logger.debug(
+                        "no user data dir could be extracted from supplied argument %s "
+                        % arg
+                    )
+        if not user_data_dir:
+            # backward compatiblity
+            # check if an old uc.ChromeOptions is used, and extract the user data dir
+            if hasattr(options, "user_data_dir") and getattr(
+                options, "user_data_dir", None
+            ):
+                import warnings
+                warnings.warn(
+                    "using ChromeOptions.user_data_dir might stop working in future versions."
+                    "use uc.Chrome(user_data_dir='/xyz/some/data') in case you need existing profile folder"
+                )
+                options.add_argument("--user-data-dir=%s" % options.user_data_dir)
+                keep_user_data_dir = True
+                logger.debug(
+                    "user_data_dir property found in options object: %s" % user_data_dir
+                )
+            else:
+                user_data_dir = os.path.normpath(tempfile.mkdtemp())
+                keep_user_data_dir = False
+                arg = "--user-data-dir=%s" % user_data_dir
+                options.add_argument(arg)
+                logger.debug(
+                    "created a temporary folder in which the user-data (profile) will be stored during this\n"
+                    "session, and added it to chrome startup arguments: %s" % arg
+                )
+        if not language:
+            try:
+                import locale
+                language = locale.getdefaultlocale()[0].replace("_", "-")
+            except Exception:
+                pass
+            if not language:
+                language = "en-US"
+        options.add_argument("--lang=%s" % language)
+        if not options.binary_location:
+            options.binary_location = (
+                browser_executable_path or find_chrome_executable()
+            )
+        if not options.binary_location or not \
+                pathlib.Path(options.binary_location).exists():
+                raise FileNotFoundError(
+                    "\n---------------------\n"
+                    "Could not determine browser executable."
+                    "\n---------------------\n"
+                    "Make sure your browser is installed in the default location (path).\n"
+                    "If you are sure about the browser executable, you can specify it using\n"
+                    "the `browser_executable_path='{}` parameter.\n\n"
+                    .format("/path/to/browser/executable" if IS_POSIX else "c:/path/to/your/browser.exe")
+                )
+        self._delay = 3
+        self.user_data_dir = user_data_dir
+        self.keep_user_data_dir = keep_user_data_dir
+        if suppress_welcome:
+            options.arguments.extend(["--no-default-browser-check", "--no-first-run"])
+        if no_sandbox:
+            options.arguments.extend(["--no-sandbox", "--test-type"])
+        if headless or getattr(options, 'headless', None):
+            #workaround until a better checking is found
+            try:
+                v_main = int(self.patcher.version_main) if self.patcher.version_main else 108
+                if v_main < 108:
+                    options.add_argument("--headless=chrome")
+                elif v_main >= 108:
+                    options.add_argument("--headless=new")
+            except:
+                logger.warning("could not detect version_main."
+                               "therefore, we are assuming it is chrome 108 or higher")
+                options.add_argument("--headless=new")
+        options.add_argument("--window-size=1920,1080")
+        options.add_argument("--start-maximized")
+        options.add_argument("--no-sandbox")
+        # fixes "could not connect to chrome" error when running
+        # on linux using privileged user like root (which i don't recommend)
+        options.add_argument(
+            "--log-level=%d" % log_level
+            or divmod(logging.getLogger().getEffectiveLevel(), 10)[0]
+        )
+        if hasattr(options, "handle_prefs"):
+            options.handle_prefs(user_data_dir)
+        # fix exit_type flag to prevent tab-restore nag
+        try:
+            with open(
+                os.path.join(user_data_dir, "Default/Preferences"),
+                encoding="latin1",
+                mode="r+",
+            ) as fs:
+                config = json.load(fs)
+                if config["profile"]["exit_type"] is not None:
+                    # fixing the restore-tabs-nag
+                    config["profile"]["exit_type"] = None
+                fs.seek(0, 0)
+                json.dump(config, fs)
+                fs.truncate()  # the file might be shorter
+                logger.debug("fixed exit_type flag")
+        except Exception as e:
+            logger.debug("did not find a bad exit_type flag ")
+        self.options = options
+        if not desired_capabilities:
+            desired_capabilities = options.to_capabilities()
+        if not use_subprocess and not windows_headless:
+            self.browser_pid = start_detached(
+                options.binary_location, *options.arguments
+            )
+        else:
+            startupinfo = None
+            if os.name == 'nt' and windows_headless:
+                # STARTUPINFO() is Windows only
+                startupinfo = subprocess.STARTUPINFO()
+                startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW
+            browser = subprocess.Popen(
+                [options.binary_location, *options.arguments],
+                stdin=subprocess.PIPE,
+                stdout=subprocess.PIPE,
+                stderr=subprocess.PIPE,
+                close_fds=IS_POSIX,
+                startupinfo=startupinfo
+            )
+            self.browser_pid = browser.pid
+        service = selenium.webdriver.chromium.service.ChromiumService(
+            self.patcher.executable_path
+        )
+        super().__init__(
+            service=service,
+            options=options,
+            keep_alive=keep_alive,
+        )
+        self.reactor = None
+        if enable_cdp_events:
+            if logging.getLogger().getEffectiveLevel() == logging.DEBUG:
+                logging.getLogger(
+                    "selenium.webdriver.remote.remote_connection"
+                ).setLevel(20)
+            reactor = Reactor(self)
+            reactor.start()
+            self.reactor = reactor
+        if advanced_elements:
+            self._web_element_cls = UCWebElement
+        else:
+            self._web_element_cls = WebElement
+        if headless or getattr(options, 'headless', None):
+            self._configure_headless()
+    def _configure_headless(self):
+        orig_get = self.get
+        logger.info("setting properties for headless")
+        def get_wrapped(*args, **kwargs):
+            if self.execute_script("return navigator.webdriver"):
+                logger.info("patch navigator.webdriver")
+                self.execute_cdp_cmd(
+                    "Page.addScriptToEvaluateOnNewDocument",
+                    {
+                        "source": """
+                                Object.defineProperty(window, "navigator", {
+                                  value: new Proxy(navigator, {
+                                    has: (target, key) => (key === "webdriver" ? false : key in target),
+                                    get: (target, key) =>
+                                      key === "webdriver"
+                                        ? false
+                                        : typeof target[key] === "function"
+                                        ? target[key].bind(target)
+                                        : target[key],
+                                  }),
+                                });
+                    """
+                    },
+                )
+                logger.info("patch user-agent string")
+                self.execute_cdp_cmd(
+                    "Network.setUserAgentOverride",
+                    {
+                        "userAgent": self.execute_script(
+                            "return navigator.userAgent"
+                        ).replace("Headless", "")
+                    },
+                )
+                self.execute_cdp_cmd(
+                    "Page.addScriptToEvaluateOnNewDocument",
+                    {
+                        "source": """
+                            Object.defineProperty(navigator, 'maxTouchPoints', {get: () => 1});
+                            Object.defineProperty(navigator.connection, 'rtt', {get: () => 100});
+                            // https://github.com/microlinkhq/browserless/blob/master/packages/goto/src/evasions/chrome-runtime.js
+                            window.chrome = {
+                                app: {
+                                    isInstalled: false,
+                                    InstallState: {
+                                        DISABLED: 'disabled',
+                                        INSTALLED: 'installed',
+                                        NOT_INSTALLED: 'not_installed'
+                                    },
+                                    RunningState: {
+                                        CANNOT_RUN: 'cannot_run',
+                                        READY_TO_RUN: 'ready_to_run',
+                                        RUNNING: 'running'
+                                    }
+                                },
+                                runtime: {
+                                    OnInstalledReason: {
+                                        CHROME_UPDATE: 'chrome_update',
+                                        INSTALL: 'install',
+                                        SHARED_MODULE_UPDATE: 'shared_module_update',
+                                        UPDATE: 'update'
+                                    },
+                                    OnRestartRequiredReason: {
+                                        APP_UPDATE: 'app_update',
+                                        OS_UPDATE: 'os_update',
+                                        PERIODIC: 'periodic'
+                                    },
+                                    PlatformArch: {
+                                        ARM: 'arm',
+                                        ARM64: 'arm64',
+                                        MIPS: 'mips',
+                                        MIPS64: 'mips64',
+                                        X86_32: 'x86-32',
+                                        X86_64: 'x86-64'
+                                    },
+                                    PlatformNaclArch: {
+                                        ARM: 'arm',
+                                        MIPS: 'mips',
+                                        MIPS64: 'mips64',
+                                        X86_32: 'x86-32',
+                                        X86_64: 'x86-64'
+                                    },
+                                    PlatformOs: {
+                                        ANDROID: 'android',
+                                        CROS: 'cros',
+                                        LINUX: 'linux',
+                                        MAC: 'mac',
+                                        OPENBSD: 'openbsd',
+                                        WIN: 'win'
+                                    },
+                                    RequestUpdateCheckStatus: {
+                                        NO_UPDATE: 'no_update',
+                                        THROTTLED: 'throttled',
+                                        UPDATE_AVAILABLE: 'update_available'
+                                    }
+                                }
+                            }
+                            // https://github.com/microlinkhq/browserless/blob/master/packages/goto/src/evasions/navigator-permissions.js
+                            if (!window.Notification) {
+                                window.Notification = {
+                                    permission: 'denied'
+                                }
+                            }
+                            const originalQuery = window.navigator.permissions.query
+                            window.navigator.permissions.__proto__.query = parameters =>
+                                parameters.name === 'notifications'
+                                    ? Promise.resolve({ state: window.Notification.permission })
+                                    : originalQuery(parameters)
+                            const oldCall = Function.prototype.call
+                            function call() {
+                                return oldCall.apply(this, arguments)
+                            }
+                            Function.prototype.call = call
+                            const nativeToStringFunctionString = Error.toString().replace(/Error/g, 'toString')
+                            const oldToString = Function.prototype.toString
+                            function functionToString() {
+                                if (this === window.navigator.permissions.query) {
+                                    return 'function query() { [native code] }'
+                                }
+                                if (this === functionToString) {
+                                    return nativeToStringFunctionString
+                                }
+                                return oldCall.call(oldToString, this)
+                            }
+                            // eslint-disable-next-line
+                            Function.prototype.toString = functionToString
+                            """
+                    },
+                )
+            return orig_get(*args, **kwargs)
+        self.get = get_wrapped
+    # def _get_cdc_props(self):
+    #     return self.execute_script(
+    #         """
+    #         let objectToInspect = window,
+    #             result = [];
+    #         while(objectToInspect !== null)
+    #         { result = result.concat(Object.getOwnPropertyNames(objectToInspect));
+    #           objectToInspect = Object.getPrototypeOf(objectToInspect); }
+    #
+    #         return result.filter(i => i.match(/^([a-zA-Z]){27}(Array|Promise|Symbol)$/ig))
+    #         """
+    #     )
+    #
+    # def _hook_remove_cdc_props(self):
+    #     self.execute_cdp_cmd(
+    #         "Page.addScriptToEvaluateOnNewDocument",
+    #         {
+    #             "source": """
+    #                 let objectToInspect = window,
+    #                     result = [];
+    #                 while(objectToInspect !== null)
+    #                 { result = result.concat(Object.getOwnPropertyNames(objectToInspect));
+    #                   objectToInspect = Object.getPrototypeOf(objectToInspect); }
+    #                 result.forEach(p => p.match(/^([a-zA-Z]){27}(Array|Promise|Symbol)$/ig)
+    #                                     &&delete window[p]&&console.log('removed',p))
+    #                 """
+    #         },
+    #     )
+    def get(self, url):
+        # if self._get_cdc_props():
+        #     self._hook_remove_cdc_props()
+        return super().get(url)
+    def add_cdp_listener(self, event_name, callback):
+        if (
+            self.reactor
+            and self.reactor is not None
+            and isinstance(self.reactor, Reactor)
+        ):
+            self.reactor.add_event_handler(event_name, callback)
+            return self.reactor.handlers
+        return False
+    def clear_cdp_listeners(self):
+        if self.reactor and isinstance(self.reactor, Reactor):
+            self.reactor.handlers.clear()
+    def window_new(self):
+        self.execute(
+            selenium.webdriver.remote.command.Command.NEW_WINDOW, {"type": "window"}
+        )
+    def tab_new(self, url: str):
+        """
+        this opens a url in a new tab.
+        apparently, that passes all tests directly!
+        Parameters
+        ----------
+        url
+        Returns
+        -------
+        """
+        if not hasattr(self, "cdp"):
+            from .cdp import CDP
+            cdp = CDP(self.options)
+            cdp.tab_new(url)
+    def reconnect(self, timeout=0.1):
+        try:
+            self.service.stop()
+        except Exception as e:
+            logger.debug(e)
+        time.sleep(timeout)
+        try:
+            self.service.start()
+        except Exception as e:
+            logger.debug(e)
+        try:
+            self.start_session()
+        except Exception as e:
+            logger.debug(e)
+    def start_session(self, capabilities=None, browser_profile=None):
+        if not capabilities:
+            capabilities = self.options.to_capabilities()
+        super().start_session(capabilities)
+        # super(Chrome, self).start_session(capabilities, browser_profile) # Original explicit call commented out
+    def find_elements_recursive(self, by, value):
+        """
+        find elements in all frames
+        this is a generator function, which is needed
+            since if it would return a list of elements, they
+            will be stale on arrival.
+        using generator, when the element is returned we are in the correct frame
+        to use it directly
+        Args:
+            by: By
+            value: str
+        Returns: Generator[webelement.WebElement]
+        """
+        def search_frame(f=None):
+            if not f:
+                # ensure we are on main content frame
+                self.switch_to.default_content()
+            else:
+                self.switch_to.frame(f)
+            for elem in self.find_elements(by, value):
+                yield elem
+            # switch back to main content, otherwise we will get StaleElementReferenceException
+            self.switch_to.default_content()
+        # search root frame
+        for elem in search_frame():
+            yield elem
+        # get iframes
+        frames = self.find_elements('css selector', 'iframe')
+        # search per frame
+        for f in frames:
+            for elem in search_frame(f):
+                yield elem
+    def quit(self):
+        try:
+            self.service.stop()
+            self.service.process.kill()
+            self.command_executor.close()
+            self.service.process.wait(5)
+            logger.debug("webdriver process ended")
+        except (AttributeError, RuntimeError, OSError):
+            pass
+        try:
+            self.reactor.event.set()
+            logger.debug("shutting down reactor")
+        except AttributeError:
+            pass
+        try:
+            os.kill(self.browser_pid, 15)
+            logger.debug("gracefully closed browser")
+        except Exception as e:  # noqa
+            pass
+        if (
+            hasattr(self, "keep_user_data_dir")
+            and hasattr(self, "user_data_dir")
+            and not self.keep_user_data_dir
+        ):
+            for _ in range(5):
+                try:
+                    shutil.rmtree(self.user_data_dir, ignore_errors=False)
+                except FileNotFoundError:
+                    pass
+                except (RuntimeError, OSError, PermissionError) as e:
+                    logger.debug(
+                        "When removing the temp profile, a %s occured: %s\nretrying..."
+                        % (e.__class__.__name__, e)
+                    )
+                else:
+                    logger.debug("successfully removed %s" % self.user_data_dir)
+                    break
+                try:
+                    time.sleep(0.1)
+                except OSError:
+                    pass
+        # dereference patcher, so patcher can start cleaning up as well.
+        # this must come last, otherwise it will throw 'in use' errors
+        self.patcher = None
+    def __getattribute__(self, item):
+        if not super().__getattribute__("debug"):
+            return super().__getattribute__(item)
+        else:
+            import inspect
+            original = super().__getattribute__(item)
+            if inspect.ismethod(original) and not inspect.isclass(original):
+                def newfunc(*args, **kwargs):
+                    logger.debug(
+                        "calling %s with args %s and kwargs %s\n"
+                        % (original.__qualname__, args, kwargs)
+                    )
+                    return original(*args, **kwargs)
+                return newfunc
+            return original
+    def __enter__(self):
+        return self
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        self.service.stop()
+        time.sleep(self._delay)
+        self.service.start()
+        self.start_session()
+    def __hash__(self):
+        return hash(self.options.debugger_address)
+    def __dir__(self):
+        return object.__dir__(self)
+    def __del__(self):
+        try:
+            self.service.process.kill()
+        except:  # noqa
+            pass
+        self.quit()
+    @classmethod
+    def _ensure_close(cls, self):
+        # needs to be a classmethod so finalize can find the reference
+        logger.info("ensuring close")
+        if (
+            hasattr(self, "service")
+            and hasattr(self.service, "process")
+            and hasattr(self.service.process, "kill")
+        ):
+            self.service.process.kill()
+def find_chrome_executable():
+    """
+    Finds the chrome, chrome beta, chrome canary, chromium executable
+    Returns
+    -------
+    executable_path :  str
+        the full file path to found executable
+    """
+    candidates = set()
+    if IS_POSIX:
+        for item in os.environ.get("PATH").split(os.pathsep):
+            for subitem in (
+                "google-chrome",
+                "chromium",
+                "chromium-browser",
+                "chrome",
+                "google-chrome-stable",
+            ):
+                candidates.add(os.sep.join((item, subitem)))
+        if "darwin" in sys.platform:
+            candidates.update(
+                [
+                    "/Applications/Google Chrome.app/Contents/MacOS/Google Chrome",
+                    "/Applications/Chromium.app/Contents/MacOS/Chromium",
+                ]
+            )
+    else:
+        for item in map(
+            os.environ.get,
+            ("PROGRAMFILES", "PROGRAMFILES(X86)", "LOCALAPPDATA", "PROGRAMW6432"),
+        ):
+            if item is not None:
+                for subitem in (
+                    "Google/Chrome/Application",
+                ):
+                    candidates.add(os.sep.join((item, subitem, "chrome.exe")))
+    for candidate in candidates:
+        logger.debug('checking if %s exists and is executable' % candidate)
+        if os.path.exists(candidate) and os.access(candidate, os.X_OK):
+            logger.debug('found! using %s' % candidate)
+            return os.path.normpath(candidate)

flaresolverr/undetected_chromedriver/cdp.py ADDED Viewed

	@@ -0,0 +1,112 @@

+#!/usr/bin/env python3
+# this module is part of undetected_chromedriver
+import json
+import logging
+import requests
+import websockets
+log = logging.getLogger(__name__)
+class CDPObject(dict):
+    def __init__(self, *a, **k):
+        super().__init__(*a, **k)
+        self.__dict__ = self
+        for k in self.__dict__:
+            if isinstance(self.__dict__[k], dict):
+                self.__dict__[k] = CDPObject(self.__dict__[k])
+            elif isinstance(self.__dict__[k], list):
+                for i in range(len(self.__dict__[k])):
+                    if isinstance(self.__dict__[k][i], dict):
+                        self.__dict__[k][i] = CDPObject(self)
+    def __repr__(self):
+        tpl = f"{self.__class__.__name__}(\n\t{{}}\n\t)"
+        return tpl.format("\n  ".join(f"{k} = {v}" for k, v in self.items()))
+class PageElement(CDPObject):
+    pass
+class CDP:
+    log = logging.getLogger("CDP")
+    endpoints = CDPObject(
+        {
+            "json": "/json",
+            "protocol": "/json/protocol",
+            "list": "/json/list",
+            "new": "/json/new?{url}",
+            "activate": "/json/activate/{id}",
+            "close": "/json/close/{id}",
+        }
+    )
+    def __init__(self, options: "ChromeOptions"):  # noqa
+        self.server_addr = "http://{0}:{1}".format(*options.debugger_address.split(":"))
+        self._reqid = 0
+        self._session = requests.Session()
+        self._last_resp = None
+        self._last_json = None
+        resp = self.get(self.endpoints.json)  # noqa
+        self.sessionId = resp[0]["id"]
+        self.wsurl = resp[0]["webSocketDebuggerUrl"]
+    def tab_activate(self, id=None):
+        if not id:
+            active_tab = self.tab_list()[0]
+            id = active_tab.id  # noqa
+            self.wsurl = active_tab.webSocketDebuggerUrl  # noqa
+        return self.post(self.endpoints["activate"].format(id=id))
+    def tab_list(self):
+        retval = self.get(self.endpoints["list"])
+        return [PageElement(o) for o in retval]
+    def tab_new(self, url):
+        return self.post(self.endpoints["new"].format(url=url))
+    def tab_close_last_opened(self):
+        sessions = self.tab_list()
+        opentabs = [s for s in sessions if s["type"] == "page"]
+        return self.post(self.endpoints["close"].format(id=opentabs[-1]["id"]))
+    async def send(self, method: str, params: dict):
+        self._reqid += 1
+        async with websockets.connect(self.wsurl) as ws:
+            await ws.send(
+                json.dumps({"method": method, "params": params, "id": self._reqid})
+            )
+            self._last_resp = await ws.recv()
+            self._last_json = json.loads(self._last_resp)
+            self.log.info(self._last_json)
+    def get(self, uri):
+        resp = self._session.get(self.server_addr + uri)
+        try:
+            self._last_resp = resp
+            self._last_json = resp.json()
+        except Exception:
+            return
+        else:
+            return self._last_json
+    def post(self, uri, data: dict = None):
+        if not data:
+            data = {}
+        resp = self._session.post(self.server_addr + uri, json=data)
+        try:
+            self._last_resp = resp
+            self._last_json = resp.json()
+        except Exception:
+            return self._last_resp
+    @property
+    def last_json(self):
+        return self._last_json

flaresolverr/undetected_chromedriver/devtool.py ADDED Viewed

	@@ -0,0 +1,193 @@

+import asyncio
+from collections.abc import Mapping
+from collections.abc import Sequence
+from functools import wraps
+import os
+import logging
+import threading
+import time
+import traceback
+from typing import Any
+from typing import Awaitable
+from typing import Callable
+from typing import List
+from typing import Optional
+class Structure(dict):
+    """
+    This is a dict-like object structure, which you should subclass
+    Only properties defined in the class context are used on initialization.
+    See example
+    """
+    _store = {}
+    def __init__(self, *a, **kw):
+        """
+        Instantiate a new instance.
+        :param a:
+        :param kw:
+        """
+        super().__init__()
+        # auxiliar dict
+        d = dict(*a, **kw)
+        for k, v in d.items():
+            if isinstance(v, Mapping):
+                self[k] = self.__class__(v)
+            elif isinstance(v, Sequence) and not isinstance(v, (str, bytes)):
+                self[k] = [self.__class__(i) for i in v]
+            else:
+                self[k] = v
+        super().__setattr__("__dict__", self)
+    def __getattr__(self, item):
+        return getattr(super(), item)
+    def __getitem__(self, item):
+        return super().__getitem__(item)
+    def __setattr__(self, key, value):
+        self.__setitem__(key, value)
+    def __setitem__(self, key, value):
+        super().__setitem__(key, value)
+    def update(self, *a, **kw):
+        super().update(*a, **kw)
+    def __eq__(self, other):
+        return frozenset(other.items()) == frozenset(self.items())
+    def __hash__(self):
+        return hash(frozenset(self.items()))
+    @classmethod
+    def __init_subclass__(cls, **kwargs):
+        cls._store = {}
+    def _normalize_strings(self):
+        for k, v in self.copy().items():
+            if isinstance(v, (str)):
+                self[k] = v.strip()
+def timeout(seconds=3, on_timeout: Optional[Callable[[callable], Any]] = None):
+    def wrapper(func):
+        @wraps(func)
+        def wrapped(*args, **kwargs):
+            def function_reached_timeout():
+                if on_timeout:
+                    on_timeout(func)
+                else:
+                    raise TimeoutError("function call timed out")
+            t = threading.Timer(interval=seconds, function=function_reached_timeout)
+            t.start()
+            try:
+                return func(*args, **kwargs)
+            except:
+                t.cancel()
+                raise
+            finally:
+                t.cancel()
+        return wrapped
+    return wrapper
+def test():
+    import sys, os
+    sys.path.insert(0, os.path.abspath(os.path.dirname(__file__)))
+    import undetected_chromedriver as uc
+    import threading
+    def collector(
+        driver: uc.Chrome,
+        stop_event: threading.Event,
+        on_event_coro: Optional[Callable[[List[str]], Awaitable[Any]]] = None,
+        listen_events: Sequence = ("browser", "network", "performance"),
+    ):
+        def threaded(driver, stop_event, on_event_coro):
+            async def _ensure_service_started():
+                while (
+                    getattr(driver, "service", False)
+                    and getattr(driver.service, "process", False)
+                    and driver.service.process.poll()
+                ):
+                    print("waiting for driver service to come back on")
+                    await asyncio.sleep(0.05)
+                    # await asyncio.sleep(driver._delay or .25)
+            async def get_log_lines(typ):
+                await _ensure_service_started()
+                return driver.get_log(typ)
+            async def looper():
+                while not stop_event.is_set():
+                    log_lines = []
+                    try:
+                        for _ in listen_events:
+                            try:
+                                log_lines += await get_log_lines(_)
+                            except:
+                                if logging.getLogger().getEffectiveLevel() <= 10:
+                                    traceback.print_exc()
+                                continue
+                        if log_lines and on_event_coro:
+                            await on_event_coro(log_lines)
+                    except Exception as e:
+                        if logging.getLogger().getEffectiveLevel() <= 10:
+                            traceback.print_exc()
+            loop = asyncio.new_event_loop()
+            asyncio.set_event_loop(loop)
+            loop.run_until_complete(looper())
+        t = threading.Thread(target=threaded, args=(driver, stop_event, on_event_coro))
+        t.start()
+    async def on_event(data):
+        print("on_event")
+        print("data:", data)
+    def func_called(fn):
+        def wrapped(*args, **kwargs):
+            print(
+                "func called! %s  (args: %s, kwargs: %s)" % (fn.__name__, args, kwargs)
+            )
+            while driver.service.process and driver.service.process.poll() is not None:
+                time.sleep(0.1)
+            res = fn(*args, **kwargs)
+            print("func completed! (result: %s)" % res)
+            return res
+        return wrapped
+    logging.basicConfig(level=10)
+    options = uc.ChromeOptions()
+    options.set_capability(
+        "goog:loggingPrefs", {"performance": "ALL", "browser": "ALL", "network": "ALL"}
+    )
+    driver = uc.Chrome(version_main=96, options=options)
+    # driver.command_executor._request = timeout(seconds=1)(driver.command_executor._request)
+    driver.command_executor._request = func_called(driver.command_executor._request)
+    collector_stop = threading.Event()
+    collector(driver, collector_stop, on_event)
+    driver.get("https://nowsecure.nl")
+    time.sleep(10)
+    if os.name == "nt":
+        driver.close()
+    driver.quit()

flaresolverr/undetected_chromedriver/dprocess.py ADDED Viewed

	@@ -0,0 +1,77 @@

+import atexit
+import logging
+import multiprocessing
+import os
+import platform
+import signal
+from subprocess import PIPE
+from subprocess import Popen
+import sys
+CREATE_NEW_PROCESS_GROUP = 0x00000200
+DETACHED_PROCESS = 0x00000008
+REGISTERED = []
+def start_detached(executable, *args):
+    """
+    Starts a fully independent subprocess (with no parent)
+    :param executable: executable
+    :param args: arguments to the executable, eg: ['--param1_key=param1_val', '-vvv' ...]
+    :return: pid of the grandchild process
+    """
+    # create pipe
+    reader, writer = multiprocessing.Pipe(False)
+    # do not keep reference
+    process = multiprocessing.Process(
+        target=_start_detached,
+        args=(executable, *args),
+        kwargs={"writer": writer},
+        daemon=True,
+    )
+    process.start()
+    process.join()
+    # receive pid from pipe
+    pid = reader.recv()
+    REGISTERED.append(pid)
+    # close pipes
+    writer.close()
+    reader.close()
+    process.close()
+    return pid
+def _start_detached(executable, *args, writer: multiprocessing.Pipe = None):
+    # configure launch
+    kwargs = {}
+    if platform.system() == "Windows":
+        kwargs.update(creationflags=DETACHED_PROCESS | CREATE_NEW_PROCESS_GROUP)
+    elif sys.version_info < (3, 2):
+        # assume posix
+        kwargs.update(preexec_fn=os.setsid)
+    else:  # Python 3.2+ and Unix
+        kwargs.update(start_new_session=True)
+    # run
+    p = Popen([executable, *args], stdin=PIPE, stdout=PIPE, stderr=PIPE, **kwargs)
+    # send pid to pipe
+    writer.send(p.pid)
+    sys.exit()
+def _cleanup():
+    for pid in REGISTERED:
+        try:
+            logging.getLogger(__name__).debug("cleaning up pid %d " % pid)
+            os.kill(pid, signal.SIGTERM)
+        except:  # noqa
+            pass
+atexit.register(_cleanup)

flaresolverr/undetected_chromedriver/options.py ADDED Viewed

	@@ -0,0 +1,85 @@

+#!/usr/bin/env python3
+# this module is part of undetected_chromedriver
+import json
+import os
+from selenium.webdriver.chromium.options import ChromiumOptions as _ChromiumOptions
+class ChromeOptions(_ChromiumOptions):
+    _session = None
+    _user_data_dir = None
+    @property
+    def user_data_dir(self):
+        return self._user_data_dir
+    @user_data_dir.setter
+    def user_data_dir(self, path: str):
+        """
+        Sets the browser profile folder to use, or creates a new profile
+        at given <path>.
+        Parameters
+        ----------
+        path: str
+            the path to a chrome profile folder
+            if it does not exist, a new profile will be created at given location
+        """
+        apath = os.path.abspath(path)
+        self._user_data_dir = os.path.normpath(apath)
+    @staticmethod
+    def _undot_key(key, value):
+        """turn a (dotted key, value) into a proper nested dict"""
+        if "." in key:
+            key, rest = key.split(".", 1)
+            value = ChromeOptions._undot_key(rest, value)
+        return {key: value}
+    @staticmethod
+    def _merge_nested(a, b):
+        """
+        merges b into a
+        leaf values in a are overwritten with values from b
+        """
+        for key in b:
+            if key in a:
+                if isinstance(a[key], dict) and isinstance(b[key], dict):
+                    ChromeOptions._merge_nested(a[key], b[key])
+                    continue
+            a[key] = b[key]
+        return a
+    def handle_prefs(self, user_data_dir):
+        prefs = self.experimental_options.get("prefs")
+        if prefs:
+            user_data_dir = user_data_dir or self._user_data_dir
+            default_path = os.path.join(user_data_dir, "Default")
+            os.makedirs(default_path, exist_ok=True)
+            # undot prefs dict keys
+            undot_prefs = {}
+            for key, value in prefs.items():
+                undot_prefs = self._merge_nested(
+                    undot_prefs, self._undot_key(key, value)
+                )
+            prefs_file = os.path.join(default_path, "Preferences")
+            if os.path.exists(prefs_file):
+                with open(prefs_file, encoding="latin1", mode="r") as f:
+                    undot_prefs = self._merge_nested(json.load(f), undot_prefs)
+            with open(prefs_file, encoding="latin1", mode="w") as f:
+                json.dump(undot_prefs, f)
+            # remove the experimental_options to avoid an error
+            del self._experimental_options["prefs"]
+    @classmethod
+    def from_options(cls, options):
+        o = cls()
+        o.__dict__.update(options.__dict__)
+        return o

flaresolverr/undetected_chromedriver/patcher.py ADDED Viewed

	@@ -0,0 +1,473 @@

+#!/usr/bin/env python3
+# this module is part of undetected_chromedriver
+from packaging.version import Version as LooseVersion
+import io
+import json
+import logging
+import os
+import pathlib
+import platform
+import random
+import re
+import shutil
+import string
+import subprocess
+import sys
+import time
+from urllib.request import urlopen
+from urllib.request import urlretrieve
+import zipfile
+from multiprocessing import Lock
+logger = logging.getLogger(__name__)
+IS_POSIX = sys.platform.startswith(("darwin", "cygwin", "linux", "linux2", "freebsd"))
+class Patcher(object):
+    lock = Lock()
+    exe_name = "chromedriver%s"
+    platform = sys.platform
+    if platform.endswith("win32"):
+        d = "~/appdata/roaming/undetected_chromedriver"
+    elif "LAMBDA_TASK_ROOT" in os.environ:
+        d = "/tmp/undetected_chromedriver"
+    elif platform.startswith(("linux", "linux2")):
+        d = "~/.local/share/undetected_chromedriver"
+    elif platform.endswith("darwin"):
+        d = "~/Library/Application Support/undetected_chromedriver"
+    else:
+        d = "~/.undetected_chromedriver"
+    data_path = os.path.abspath(os.path.expanduser(d))
+    def __init__(
+        self,
+        executable_path=None,
+        force=False,
+        version_main: int = 0,
+        user_multi_procs=False,
+    ):
+        """
+        Args:
+            executable_path: None = automatic
+                             a full file path to the chromedriver executable
+            force: False
+                    terminate processes which are holding lock
+            version_main: 0 = auto
+                specify main chrome version (rounded, ex: 82)
+        """
+        self.force = force
+        self._custom_exe_path = False
+        prefix = "undetected"
+        self.user_multi_procs = user_multi_procs
+        try:
+            # Try to convert version_main into an integer
+            version_main_int = int(version_main)
+            # check if version_main_int is less than or equal to e.g 114
+            self.is_old_chromedriver = version_main and version_main_int <= 114
+        except (ValueError,TypeError):
+            # Check not running inside Docker
+            if not os.path.exists("/app/chromedriver"):
+                # If the conversion fails, log an error message
+                logging.info("version_main cannot be converted to an integer")
+            # Set self.is_old_chromedriver to False if the conversion fails
+            self.is_old_chromedriver = False
+        # Needs to be called before self.exe_name is accessed
+        self._set_platform_name()
+        if not os.path.exists(self.data_path):
+            os.makedirs(self.data_path, exist_ok=True)
+        if not executable_path:
+            if sys.platform.startswith("freebsd"):
+                self.executable_path = os.path.join(
+                    self.data_path, self.exe_name
+                )
+            else:
+                self.executable_path = os.path.join(
+                    self.data_path, "_".join([prefix, self.exe_name])
+                )
+        if not IS_POSIX:
+            if executable_path:
+                if not executable_path[-4:] == ".exe":
+                    executable_path += ".exe"
+        self.zip_path = os.path.join(self.data_path, prefix)
+        if not executable_path:
+            if not self.user_multi_procs:
+                self.executable_path = os.path.abspath(
+                    os.path.join(".", self.executable_path)
+                )
+        if executable_path:
+            self._custom_exe_path = True
+            self.executable_path = executable_path
+        # Set the correct repository to download the Chromedriver from
+        if self.is_old_chromedriver:
+            self.url_repo = "https://chromedriver.storage.googleapis.com"
+        else:
+            self.url_repo = "https://googlechromelabs.github.io/chrome-for-testing"
+        self.version_main = version_main
+        self.version_full = None
+    def _set_platform_name(self):
+        """
+        Set the platform and exe name based on the platform undetected_chromedriver is running on
+        in order to download the correct chromedriver.
+        """
+        if self.platform.endswith("win32"):
+            self.platform_name = "win32"
+            self.exe_name %= ".exe"
+        if self.platform.endswith(("linux", "linux2")):
+            self.platform_name = "linux64"
+            self.exe_name %= ""
+        if self.platform.endswith("darwin"):
+            if self.is_old_chromedriver:
+                self.platform_name = "mac64"
+            else:
+                self.platform_name = "mac-x64"
+            self.exe_name %= ""
+        if self.platform.startswith("freebsd"):
+            self.platform_name = "freebsd"
+            self.exe_name %= ""
+    def auto(self, executable_path=None, force=False, version_main=None, _=None):
+        """
+        Args:
+            executable_path:
+            force:
+            version_main:
+        Returns:
+        """
+        p = pathlib.Path(self.data_path)
+        if self.user_multi_procs:
+            with Lock():
+                files = list(p.rglob("*chromedriver*"))
+                most_recent = max(files, key=lambda f: f.stat().st_mtime)
+                files.remove(most_recent)
+                list(map(lambda f: f.unlink(), files))
+                if self.is_binary_patched(most_recent):
+                    self.executable_path = str(most_recent)
+                    return True
+        if executable_path:
+            self.executable_path = executable_path
+            self._custom_exe_path = True
+        if self._custom_exe_path:
+            ispatched = self.is_binary_patched(self.executable_path)
+            if not ispatched:
+                return self.patch_exe()
+            else:
+                return
+        if version_main:
+            self.version_main = version_main
+        if force is True:
+            self.force = force
+        if self.platform_name == "freebsd":
+            chromedriver_path = shutil.which("chromedriver")
+            if not os.path.isfile(chromedriver_path) or not os.access(chromedriver_path, os.X_OK):
+                logging.error("Chromedriver not installed!")
+                return
+            version_path = os.path.join(os.path.dirname(self.executable_path), "version.txt")
+            process = os.popen(f'"{chromedriver_path}" --version')
+            chromedriver_version = process.read().split(' ')[1].split(' ')[0]
+            process.close()
+            current_version = None
+            if os.path.isfile(version_path) or os.access(version_path, os.X_OK):
+                with open(version_path, 'r') as f:
+                    current_version = f.read()
+            if current_version != chromedriver_version:
+                logging.info("Copying chromedriver executable...")
+                shutil.copy(chromedriver_path, self.executable_path)
+                os.chmod(self.executable_path, 0o755)
+                with open(version_path, 'w') as f:
+                    f.write(chromedriver_version)
+                logging.info("Chromedriver executable copied!")
+        else:
+            try:
+                os.unlink(self.executable_path)
+            except PermissionError:
+                if self.force:
+                    self.force_kill_instances(self.executable_path)
+                    return self.auto(force=not self.force)
+                try:
+                    if self.is_binary_patched():
+                        # assumes already running AND patched
+                        return True
+                except PermissionError:
+                    pass
+                # return False
+            except FileNotFoundError:
+                pass
+            release = self.fetch_release_number()
+            self.version_main = release.major
+            self.version_full = release
+            self.unzip_package(self.fetch_package())
+        return self.patch()
+    def driver_binary_in_use(self, path: str = None) -> bool:
+        """
+        naive test to check if a found chromedriver binary is
+        currently in use
+        Args:
+            path: a string or PathLike object to the binary to check.
+                  if not specified, we check use this object's executable_path
+        """
+        if not path:
+            path = self.executable_path
+        p = pathlib.Path(path)
+        if not p.exists():
+            raise OSError("file does not exist: %s" % p)
+        try:
+            with open(p, mode="a+b") as fs:
+                exc = []
+                try:
+                    fs.seek(0, 0)
+                except PermissionError as e:
+                    exc.append(e)  # since some systems apprently allow seeking
+                    # we conduct another test
+                try:
+                    fs.readline()
+                except PermissionError as e:
+                    exc.append(e)
+                if exc:
+                    return True
+                return False
+            # ok safe to assume this is in use
+        except Exception as e:
+            # logger.exception("whoops ", e)
+            pass
+    def cleanup_unused_files(self):
+        p = pathlib.Path(self.data_path)
+        items = list(p.glob("*undetected*"))
+        for item in items:
+            try:
+                item.unlink()
+            except:
+                pass
+    def patch(self):
+        self.patch_exe()
+        return self.is_binary_patched()
+    def fetch_release_number(self):
+        """
+        Gets the latest major version available, or the latest major version of self.target_version if set explicitly.
+        :return: version string
+        :rtype: LooseVersion
+        """
+        # Endpoint for old versions of Chromedriver (114 and below)
+        if self.is_old_chromedriver:
+            path = f"/latest_release_{self.version_main}"
+            path = path.upper()
+            logger.debug("getting release number from %s" % path)
+            return LooseVersion(urlopen(self.url_repo + path).read().decode())
+        # Endpoint for new versions of Chromedriver (115+)
+        if not self.version_main:
+            # Fetch the latest version
+            path = "/last-known-good-versions-with-downloads.json"
+            logger.debug("getting release number from %s" % path)
+            with urlopen(self.url_repo + path) as conn:
+                response = conn.read().decode()
+            last_versions = json.loads(response)
+            return LooseVersion(last_versions["channels"]["Stable"]["version"])
+        # Fetch the latest minor version of the major version provided
+        path = "/latest-versions-per-milestone-with-downloads.json"
+        logger.debug("getting release number from %s" % path)
+        with urlopen(self.url_repo + path) as conn:
+            response = conn.read().decode()
+        major_versions = json.loads(response)
+        return LooseVersion(major_versions["milestones"][str(self.version_main)]["version"])
+    def parse_exe_version(self):
+        with io.open(self.executable_path, "rb") as f:
+            for line in iter(lambda: f.readline(), b""):
+                match = re.search(rb"platform_handle\x00content\x00([0-9.]*)", line)
+                if match:
+                    return LooseVersion(match[1].decode())
+    def fetch_package(self):
+        """
+        Downloads ChromeDriver from source
+        :return: path to downloaded file
+        """
+        zip_name = f"chromedriver_{self.platform_name}.zip"
+        if self.is_old_chromedriver:
+            download_url = "%s/%s/%s" % (self.url_repo, str(self.version_full), zip_name)
+        else:
+            zip_name = zip_name.replace("_", "-", 1)
+            download_url = "https://storage.googleapis.com/chrome-for-testing-public/%s/%s/%s"
+            download_url %= (str(self.version_full), self.platform_name, zip_name)
+        logger.debug("downloading from %s" % download_url)
+        return urlretrieve(download_url)[0]
+    def unzip_package(self, fp):
+        """
+        Does what it says
+        :return: path to unpacked executable
+        """
+        exe_path = self.exe_name
+        if not self.is_old_chromedriver:
+            # The new chromedriver unzips into its own folder
+            zip_name = f"chromedriver-{self.platform_name}"
+            exe_path = os.path.join(zip_name, self.exe_name)
+        logger.debug("unzipping %s" % fp)
+        try:
+            os.unlink(self.zip_path)
+        except (FileNotFoundError, OSError):
+            pass
+        os.makedirs(self.zip_path, mode=0o755, exist_ok=True)
+        with zipfile.ZipFile(fp, mode="r") as zf:
+            zf.extractall(self.zip_path)
+        os.rename(os.path.join(self.zip_path, exe_path), self.executable_path)
+        os.remove(fp)
+        shutil.rmtree
+        os.chmod(self.executable_path, 0o755)
+        return self.executable_path
+    @staticmethod
+    def force_kill_instances(exe_name):
+        """
+        kills running instances.
+        :param: executable name to kill, may be a path as well
+        :return: True on success else False
+        """
+        exe_name = os.path.basename(exe_name)
+        if IS_POSIX:
+            # Using shell=True for pidof, consider a more robust pid finding method if issues arise.
+            # pgrep can be an alternative: ["pgrep", "-f", exe_name]
+            # Or psutil if adding a dependency is acceptable.
+            command = f"pidof {exe_name}"
+            try:
+                result = subprocess.run(command, shell=True, capture_output=True, text=True, check=True)
+                pids = result.stdout.strip().split()
+                if pids:
+                    subprocess.run(["kill", "-9"] + pids, check=False) # Changed from -f -9 to -9 as -f is not standard for kill
+                    return True
+                return False # No PIDs found
+            except subprocess.CalledProcessError: # pidof returns 1 if no process found
+                return False # No process found
+            except Exception as e:
+                logger.debug(f"Error killing process on POSIX: {e}")
+                return False
+        else:
+            try:
+                # TASKKILL /F /IM chromedriver.exe
+                result = subprocess.run(["taskkill", "/f", "/im", exe_name], check=False, capture_output=True)
+                # taskkill returns 0 if process was killed, 128 if not found.
+                return result.returncode == 0
+            except Exception as e:
+                logger.debug(f"Error killing process on Windows: {e}")
+                return False
+    @staticmethod
+    def gen_random_cdc():
+        cdc = random.choices(string.ascii_letters, k=27)
+        return "".join(cdc).encode()
+    def is_binary_patched(self, executable_path=None):
+        executable_path = executable_path or self.executable_path
+        try:
+            with io.open(executable_path, "rb") as fh:
+                return fh.read().find(b"undetected chromedriver") != -1
+        except FileNotFoundError:
+            return False
+    def patch_exe(self):
+        start = time.perf_counter()
+        logger.info("patching driver executable %s" % self.executable_path)
+        with io.open(self.executable_path, "r+b") as fh:
+            content = fh.read()
+            # match_injected_codeblock = re.search(rb"{window.*;}", content)
+            match_injected_codeblock = re.search(rb"\{window\.cdc.*?;\}", content)
+            if match_injected_codeblock:
+                target_bytes = match_injected_codeblock[0]
+                new_target_bytes = (
+                    b'{console.log("undetected chromedriver 1337!")}'.ljust(
+                        len(target_bytes), b" "
+                    )
+                )
+                new_content = content.replace(target_bytes, new_target_bytes)
+                if new_content == content:
+                    logger.warning(
+                        "something went wrong patching the driver binary. could not find injection code block"
+                    )
+                else:
+                    logger.debug(
+                        "found block:\n%s\nreplacing with:\n%s"
+                        % (target_bytes, new_target_bytes)
+                    )
+                fh.seek(0)
+                fh.write(new_content)
+        logger.debug(
+            "patching took us {:.2f} seconds".format(time.perf_counter() - start)
+        )
+    def __repr__(self):
+        return "{0:s}({1:s})".format(
+            self.__class__.__name__,
+            self.executable_path,
+        )
+    def __del__(self):
+        if self._custom_exe_path:
+            # if the driver binary is specified by user
+            # we assume it is important enough to not delete it
+            return
+        else:
+            timeout = 3  # stop trying after this many seconds
+            t = time.monotonic()
+            now = lambda: time.monotonic()
+            while now() - t > timeout:
+                # we don't want to wait until the end of time
+                try:
+                    if self.user_multi_procs:
+                        break
+                    os.unlink(self.executable_path)
+                    logger.debug("successfully unlinked %s" % self.executable_path)
+                    break
+                except (OSError, RuntimeError, PermissionError):
+                    time.sleep(0.01)
+                    continue
+                except FileNotFoundError:
+                    break

flaresolverr/undetected_chromedriver/reactor.py ADDED Viewed

	@@ -0,0 +1,99 @@

+#!/usr/bin/env python3
+# this module is part of undetected_chromedriver
+import asyncio
+import json
+import logging
+import threading
+logger = logging.getLogger(__name__)
+class Reactor(threading.Thread):
+    def __init__(self, driver: "Chrome"):
+        super().__init__()
+        self.driver = driver
+        self.loop = asyncio.new_event_loop()
+        self.lock = threading.Lock()
+        self.event = threading.Event()
+        self.daemon = True
+        self.handlers = {}
+    def add_event_handler(self, method_name, callback: callable):
+        """
+        Parameters
+        ----------
+        event_name: str
+            example "Network.responseReceived"
+        callback: callable
+            callable which accepts 1 parameter: the message object dictionary
+        Returns
+        -------
+        """
+        with self.lock:
+            self.handlers[method_name.lower()] = callback
+    @property
+    def running(self):
+        return not self.event.is_set()
+    def run(self):
+        try:
+            asyncio.set_event_loop(self.loop)
+            self.loop.run_until_complete(self.listen())
+        except Exception as e:
+            logger.warning("Reactor.run() => %s", e)
+    async def _wait_service_started(self):
+        while True:
+            with self.lock:
+                if (
+                    getattr(self.driver, "service", None)
+                    and getattr(self.driver.service, "process", None)
+                    and self.driver.service.process.poll()
+                ):
+                    await asyncio.sleep(self.driver._delay or 0.25)
+                else:
+                    break
+    async def listen(self):
+        while self.running:
+            await self._wait_service_started()
+            await asyncio.sleep(1)
+            try:
+                with self.lock:
+                    log_entries = self.driver.get_log("performance")
+                for entry in log_entries:
+                    try:
+                        obj_serialized: str = entry.get("message")
+                        obj = json.loads(obj_serialized)
+                        message = obj.get("message")
+                        method = message.get("method")
+                        if "*" in self.handlers:
+                            await self.loop.run_in_executor(
+                                None, self.handlers["*"], message
+                            )
+                        elif method.lower() in self.handlers:
+                            await self.loop.run_in_executor(
+                                None, self.handlers[method.lower()], message
+                            )
+                        # print(type(message), message)
+                    except Exception as e:
+                        raise e from None
+            except Exception as e:
+                if "invalid session id" in str(e):
+                    pass
+                else:
+                    logging.debug("exception ignored :", e)

flaresolverr/undetected_chromedriver/webelement.py ADDED Viewed

	@@ -0,0 +1,86 @@

+from typing import List
+from selenium.webdriver.common.by import By
+import selenium.webdriver.remote.webelement
+class WebElement(selenium.webdriver.remote.webelement.WebElement):
+    def click_safe(self):
+        super().click()
+        self._parent.reconnect(0.1)
+    def children(
+        self, tag=None, recursive=False
+    ) -> List[selenium.webdriver.remote.webelement.WebElement]:
+        """
+        returns direct child elements of current element
+        :param tag: str,  if supplied, returns <tag> nodes only
+        """
+        script = "return [... arguments[0].children]"
+        if tag:
+            script += ".filter( node => node.tagName === '%s')" % tag.upper()
+        if recursive:
+            return list(_recursive_children(self, tag))
+        return list(self._parent.execute_script(script, self))
+class UCWebElement(WebElement):
+    """
+    Custom WebElement class which makes it easier to view elements when
+    working in an interactive environment.
+    standard webelement repr:
+    <selenium.webdriver.remote.webelement.WebElement (session="85ff0f671512fa535630e71ee951b1f2", element="6357cb55-92c3-4c0f-9416-b174f9c1b8c4")>
+    using this WebElement class:
+    <WebElement(<a class="mobile-show-inline-block mc-update-infos init-ok" href="#" id="main-cat-switcher-mobile">)>
+    """
+    def __init__(self, parent, id_):
+        super().__init__(parent, id_)
+        self._attrs = None
+    @property
+    def attrs(self):
+        if not self._attrs:
+            self._attrs = self._parent.execute_script(
+                """
+                var items = {};
+                for (index = 0; index < arguments[0].attributes.length; ++index)
+                {
+                 items[arguments[0].attributes[index].name] = arguments[0].attributes[index].value
+                };
+                return items;
+                """,
+                self,
+            )
+        return self._attrs
+    def __repr__(self):
+        strattrs = " ".join([f'{k}="{v}"' for k, v in self.attrs.items()])
+        if strattrs:
+            strattrs = " " + strattrs
+        return f"{self.__class__.__name__} <{self.tag_name}{strattrs}>"
+def _recursive_children(element, tag: str = None, _results=None):
+    """
+    returns all children of <element> recursively
+    :param element: `WebElement` object.
+            find children below this <element>
+    :param tag: str = None.
+            if provided, return only <tag> elements. example: 'a', or 'img'
+    :param _results: do not use!
+    """
+    results = _results or set()
+    for element in element.children():
+        if tag:
+            if element.tag_name == tag:
+                results.add(element)
+        else:
+            results.add(element)
+        results |= _recursive_children(element, tag, results)
+    return results

flaresolverr/utils.py ADDED Viewed

	@@ -0,0 +1,376 @@

+import json
+import logging
+import os
+import platform
+import re
+import shutil
+import sys
+import tempfile
+import urllib.parse
+from selenium.webdriver.chrome.webdriver import WebDriver
+import undetected_chromedriver as uc
+FLARESOLVERR_VERSION = None
+PLATFORM_VERSION = None
+CHROME_EXE_PATH = None
+CHROME_MAJOR_VERSION = None
+USER_AGENT = None
+XVFB_DISPLAY = None
+PATCHED_DRIVER_PATH = None
+def get_config_log_html() -> bool:
+    return os.environ.get('LOG_HTML', 'false').lower() == 'true'
+def get_config_headless() -> bool:
+    return os.environ.get('HEADLESS', 'true').lower() == 'true'
+def get_config_disable_media() -> bool:
+    return os.environ.get('DISABLE_MEDIA', 'false').lower() == 'true'
+def get_flaresolverr_version() -> str:
+    global FLARESOLVERR_VERSION
+    if FLARESOLVERR_VERSION is not None:
+        return FLARESOLVERR_VERSION
+    package_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), os.pardir, 'package.json')
+    if not os.path.isfile(package_path):
+        package_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'package.json')
+    with open(package_path) as f:
+        FLARESOLVERR_VERSION = json.loads(f.read())['version']
+        return FLARESOLVERR_VERSION
+def get_current_platform() -> str:
+    global PLATFORM_VERSION
+    if PLATFORM_VERSION is not None:
+        return PLATFORM_VERSION
+    PLATFORM_VERSION = os.name
+    return PLATFORM_VERSION
+def create_proxy_extension(proxy: dict) -> str:
+    parsed_url = urllib.parse.urlparse(proxy['url'])
+    scheme = parsed_url.scheme
+    host = parsed_url.hostname
+    port = parsed_url.port
+    username = proxy['username']
+    password = proxy['password']
+    manifest_json = """
+    {
+        "version": "1.0.0",
+        "manifest_version": 3,
+        "name": "Chrome Proxy",
+        "permissions": [
+            "proxy",
+            "tabs",
+            "storage",
+            "webRequest",
+            "webRequestAuthProvider"
+        ],
+        "host_permissions": [
+          "<all_urls>"
+        ],
+        "background": {
+          "service_worker": "background.js"
+        },
+        "minimum_chrome_version": "76.0.0"
+    }
+    """
+    background_js = """
+    var config = {
+        mode: "fixed_servers",
+        rules: {
+            singleProxy: {
+                scheme: "%s",
+                host: "%s",
+                port: %d
+            },
+            bypassList: ["localhost"]
+        }
+    };
+    chrome.proxy.settings.set({value: config, scope: "regular"}, function() {});
+    function callbackFn(details) {
+        return {
+            authCredentials: {
+                username: "%s",
+                password: "%s"
+            }
+        };
+    }
+    chrome.webRequest.onAuthRequired.addListener(
+        callbackFn,
+        { urls: ["<all_urls>"] },
+        ['blocking']
+    );
+    """ % (
+        scheme,
+        host,
+        port,
+        username,
+        password
+    )
+    proxy_extension_dir = tempfile.mkdtemp()
+    with open(os.path.join(proxy_extension_dir, "manifest.json"), "w") as f:
+        f.write(manifest_json)
+    with open(os.path.join(proxy_extension_dir, "background.js"), "w") as f:
+        f.write(background_js)
+    return proxy_extension_dir
+def get_webdriver(proxy: dict = None) -> WebDriver:
+    global PATCHED_DRIVER_PATH, USER_AGENT
+    logging.debug('Launching web browser...')
+    # undetected_chromedriver
+    options = uc.ChromeOptions()
+    options.add_argument('--no-sandbox')
+    options.add_argument('--window-size=1280,1024') # Smaller window for less overhead
+    options.add_argument('--disable-search-engine-choice-screen')
+    options.add_argument('--disable-setuid-sandbox')
+    options.add_argument('--disable-dev-shm-usage')
+    options.add_argument('--no-zygote')
+    options.add_argument('--disable-gpu') # Disable GPU for faster headless boot
+    options.add_argument('--mute-audio')
+    options.add_argument('--disable-notifications')
+    options.add_argument('--disable-popup-blocking')
+    options.add_argument('--disable-extensions')
+    options.add_argument('--disable-blink-features=AutomationControlled')
+    # Force headless and invisibility
+    options.add_argument('--headless=new')
+    IS_ARMARCH = platform.machine().startswith(('arm', 'aarch'))
+    if IS_ARMARCH:
+        options.add_argument('--disable-gpu-sandbox')
+    options.add_argument('--ignore-certificate-errors')
+    options.add_argument('--ignore-ssl-errors')
+    language = os.environ.get('LANG', None)
+    if language is not None:
+        options.add_argument('--accept-lang=%s' % language)
+    # Fix for Chrome 117 | https://github.com/FlareSolverr/FlareSolverr/issues/910
+    if USER_AGENT is not None:
+        options.add_argument('--user-agent=%s' % USER_AGENT)
+    proxy_extension_dir = None
+    if proxy and all(key in proxy for key in ['url', 'username', 'password']):
+        proxy_extension_dir = create_proxy_extension(proxy)
+        options.add_argument("--disable-features=DisableLoadExtensionCommandLineSwitch")
+        options.add_argument("--load-extension=%s" % os.path.abspath(proxy_extension_dir))
+    elif proxy and 'url' in proxy:
+        proxy_url = proxy['url']
+        logging.debug("Using webdriver proxy: %s", proxy_url)
+        options.add_argument('--proxy-server=%s' % proxy_url)
+    # note: headless mode is detected (headless = True)
+    # we launch the browser in head-full mode with the window hidden
+    windows_headless = True if os.name == 'nt' else False
+    if get_config_headless():
+        if os.name != 'nt':
+            start_xvfb_display()
+    # Override for absolute invisibility on Windows
+    if os.name == 'nt':
+        options.add_argument('--hide-scrollbars')
+        options.add_argument('--disable-logging')
+        options.add_argument('--log-level=3')
+    # if we are inside the Docker container, we avoid downloading the driver
+    driver_exe_path = None
+    version_main = None
+    if os.path.exists("/app/chromedriver"):
+        # running inside Docker
+        driver_exe_path = "/app/chromedriver"
+    else:
+        version_main = get_chrome_major_version()
+        if PATCHED_DRIVER_PATH is not None:
+            driver_exe_path = PATCHED_DRIVER_PATH
+    # detect chrome path
+    browser_executable_path = get_chrome_exe_path()
+    # CRITICAL: Clean up undetected_chromedriver cache on Windows to avoid WinError 183
+    if os.name == 'nt':
+        try:
+            uc_path = os.path.join(os.environ.get('APPDATA', ''), 'undetected_chromedriver')
+            if os.path.exists(uc_path):
+                # Try to remove the file that usually causes WinError 183
+                target_exe = os.path.join(uc_path, 'undetected_chromedriver.exe')
+                if os.path.exists(target_exe):
+                    try: os.remove(target_exe)
+                    except: pass
+        except: pass
+    # downloads and patches the chromedriver
+    # if we don't set driver_executable_path it downloads, patches, and deletes the driver each time
+    try:
+        driver = uc.Chrome(options=options, browser_executable_path=browser_executable_path,
+                           driver_executable_path=driver_exe_path, version_main=version_main,
+                           windows_headless=windows_headless, headless=get_config_headless())
+    except Exception as e:
+        logging.error("Error starting Chrome: %s" % e)
+        # No point in continuing if we cannot retrieve the driver
+        raise e
+    # save the patched driver to avoid re-downloads
+    if driver_exe_path is None:
+        try:
+            target_path = os.path.join(driver.patcher.data_path, driver.patcher.exe_name)
+            if target_path != driver.patcher.executable_path:
+                # On Windows, we might get WinError 183 if the file is locked or exists
+                if os.path.exists(target_path):
+                    try: os.remove(target_path)
+                    except: pass
+                shutil.copy(driver.patcher.executable_path, target_path)
+            PATCHED_DRIVER_PATH = target_path
+        except Exception as e:
+            logging.warning(f"Failed to save patched driver: {e}")
+    # clean up proxy extension directory
+    if proxy_extension_dir is not None:
+        shutil.rmtree(proxy_extension_dir)
+    # selenium vanilla
+    # options = webdriver.ChromeOptions()
+    # options.add_argument('--no-sandbox')
+    # options.add_argument('--window-size=1920,1080')
+    # options.add_argument('--disable-setuid-sandbox')
+    # options.add_argument('--disable-dev-shm-usage')
+    # driver = webdriver.Chrome(options=options)
+    return driver
+def get_chrome_exe_path() -> str:
+    global CHROME_EXE_PATH
+    if CHROME_EXE_PATH is not None:
+        return CHROME_EXE_PATH
+    # linux pyinstaller bundle
+    chrome_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'chrome', "chrome")
+    if os.path.exists(chrome_path):
+        if not os.access(chrome_path, os.X_OK):
+            raise Exception(f'Chrome binary "{chrome_path}" is not executable. '
+                            f'Please, extract the archive with "tar xzf <file.tar.gz>".')
+        CHROME_EXE_PATH = chrome_path
+        return CHROME_EXE_PATH
+    # windows pyinstaller bundle
+    chrome_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'chrome', "chrome.exe")
+    if os.path.exists(chrome_path):
+        CHROME_EXE_PATH = chrome_path
+        return CHROME_EXE_PATH
+    # system
+    CHROME_EXE_PATH = uc.find_chrome_executable()
+    return CHROME_EXE_PATH
+def get_chrome_major_version() -> str:
+    global CHROME_MAJOR_VERSION
+    if CHROME_MAJOR_VERSION is not None:
+        return CHROME_MAJOR_VERSION
+    if os.name == 'nt':
+        # Example: '104.0.5112.79'
+        try:
+            complete_version = extract_version_nt_executable(get_chrome_exe_path())
+        except Exception:
+            try:
+                complete_version = extract_version_nt_registry()
+            except Exception:
+                # Example: '104.0.5112.79'
+                complete_version = extract_version_nt_folder()
+    else:
+        chrome_path = get_chrome_exe_path()
+        process = os.popen(f'"{chrome_path}" --version')
+        # Example 1: 'Chromium 104.0.5112.79 Arch Linux\n'
+        # Example 2: 'Google Chrome 104.0.5112.79 Arch Linux\n'
+        complete_version = process.read()
+        process.close()
+    CHROME_MAJOR_VERSION = complete_version.split('.')[0].split(' ')[-1]
+    return CHROME_MAJOR_VERSION
+def extract_version_nt_executable(exe_path: str) -> str:
+    import pefile
+    pe = pefile.PE(exe_path, fast_load=True)
+    pe.parse_data_directories(
+        directories=[pefile.DIRECTORY_ENTRY["IMAGE_DIRECTORY_ENTRY_RESOURCE"]]
+    )
+    return pe.FileInfo[0][0].StringTable[0].entries[b"FileVersion"].decode('utf-8')
+def extract_version_nt_registry() -> str:
+    stream = os.popen(
+        'reg query "HKLM\\SOFTWARE\\Wow6432Node\\Microsoft\\Windows\\CurrentVersion\\Uninstall\\Google Chrome"')
+    output = stream.read()
+    google_version = ''
+    for letter in output[output.rindex('DisplayVersion    REG_SZ') + 24:]:
+        if letter != '\n':
+            google_version += letter
+        else:
+            break
+    return google_version.strip()
+def extract_version_nt_folder() -> str:
+    # Check if the Chrome folder exists in the x32 or x64 Program Files folders.
+    for i in range(2):
+        path = 'C:\\Program Files' + (' (x86)' if i else '') + '\\Google\\Chrome\\Application'
+        if os.path.isdir(path):
+            paths = [f.path for f in os.scandir(path) if f.is_dir()]
+            for path in paths:
+                filename = os.path.basename(path)
+                pattern = r'\d+\.\d+\.\d+\.\d+'
+                match = re.search(pattern, filename)
+                if match and match.group():
+                    # Found a Chrome version.
+                    return match.group(0)
+    return ''
+def get_user_agent(driver=None) -> str:
+    global USER_AGENT
+    if USER_AGENT is not None:
+        return USER_AGENT
+    try:
+        if driver is None:
+            driver = get_webdriver()
+        USER_AGENT = driver.execute_script("return navigator.userAgent")
+        # Fix for Chrome 117 | https://github.com/FlareSolverr/FlareSolverr/issues/910
+        USER_AGENT = re.sub('HEADLESS', '', USER_AGENT, flags=re.IGNORECASE)
+        return USER_AGENT
+    except Exception as e:
+        raise Exception("Error getting browser User-Agent. " + str(e))
+    finally:
+        if driver is not None:
+            if PLATFORM_VERSION == "nt":
+                driver.close()
+            driver.quit()
+def start_xvfb_display():
+    global XVFB_DISPLAY
+    if XVFB_DISPLAY is None:
+        from xvfbwrapper import Xvfb
+        XVFB_DISPLAY = Xvfb()
+        XVFB_DISPLAY.start()
+def object_to_dict(_object):
+    json_dict = json.loads(json.dumps(_object, default=lambda o: o.__dict__))
+    # remove hidden fields
+    return {k: v for k, v in json_dict.items() if not k.startswith('__')}

keep_alive.py ADDED Viewed

	@@ -0,0 +1,47 @@

+"""
+Keep-Alive Service to prevent Render.com from sleeping
+Pings the server every 10 minutes to maintain activity
+"""
+import asyncio
+import httpx
+import logging
+from datetime import datetime
+logger = logging.getLogger("keep_alive")
+class KeepAliveService:
+    def __init__(self, base_url: str = "http://localhost:7860"):
+        self.base_url = base_url
+        self.running = False
+        self.ping_interval = 600  # 10 minutes
+    async def start(self):
+        """Start the keep-alive service"""
+        self.running = True
+        logger.info("🔄 Keep-Alive service started (pinging every 10 minutes)")
+        while self.running:
+            try:
+                await asyncio.sleep(self.ping_interval)
+                await self._ping()
+            except Exception as e:
+                logger.error(f"Keep-Alive error: {e}")
+    async def _ping(self):
+        """Send a ping to keep the service alive"""
+        try:
+            async with httpx.AsyncClient(timeout=10.0) as client:
+                response = await client.get(f"{self.base_url}/health")
+                if response.status_code == 200:
+                    logger.info(f"✅ Keep-Alive ping successful at {datetime.now().strftime('%H:%M:%S')}")
+                else:
+                    logger.warning(f"⚠️ Keep-Alive ping returned {response.status_code}")
+        except Exception as e:
+            logger.warning(f"Keep-Alive ping failed: {e}")
+    def stop(self):
+        """Stop the keep-alive service"""
+        self.running = False
+        logger.info("Keep-Alive service stopped")
+keep_alive = KeepAliveService()

main.py ADDED Viewed

	@@ -0,0 +1,352 @@

+import logging
+import time
+from typing import List, Optional
+from fastapi import FastAPI, Request, HTTPException, Query
+from fastapi.responses import JSONResponse, FileResponse, StreamingResponse, RedirectResponse
+from fastapi.middleware.cors import CORSMiddleware
+from fastapi.middleware.gzip import GZipMiddleware
+import httpx
+from scraper.engine import scraper
+from downloader import downloader
+import os
+import re
+from urllib.parse import unquote, quote
+from fastapi.staticfiles import StaticFiles
+from database import init_db
+from keep_alive import keep_alive
+import asyncio
+import io
+# Configure logging
+logging.basicConfig(
+    level=logging.INFO,
+    format="%(asctime)s [%(levelname)s] %(name)s: %(message)s",
+    datefmt="%Y-%m-%d %H:%M:%S",
+)
+logger = logging.getLogger("backend")
+app = FastAPI(title="MEIH Movies API", version="2.0.0")
+# --- Simple Caching Layer ---
+class MemoryCache:
+    def __init__(self):
+        self._cache = {}
+    def get(self, key: str):
+        item = self._cache.get(key)
+        if item:
+            expire_time, data = item
+            if time.time() < expire_time:
+                return data
+            else:
+                del self._cache[key]
+        return None
+    def set(self, key: str, data, ttl_seconds: int = 600): # Default 10 mins
+        self._cache[key] = (time.time() + ttl_seconds, data)
+cache = MemoryCache()
+async def warm_scraper():
+    """Warms up the scraper by making an initial request to sync cookies."""
+    logger.info("🔥 Warming up scraper in background...")
+    try:
+        # Give services a few more seconds to be truly ready
+        await asyncio.sleep(5)
+        await scraper.fetch_home(page=1)
+        logger.info("✅ Scraper warmed up and cookies synced")
+    except Exception as e:
+        logger.warning(f"⚠️ Scraper warmup failed (will retry on first request): {e}")
+@app.on_event("startup")
+async def startup_event():
+    await init_db()
+    logger.info("🚀 Database initialized and ready")
+    # Detect if running on Hugging Face
+    is_hf = os.environ.get("SPACE_ID") is not None or os.environ.get("HF_SPACE") is not None
+    if not is_hf:
+        # Start Keep-Alive service (only for non-HF environments)
+        asyncio.create_task(keep_alive.start())
+        # Start Warm-up service
+        asyncio.create_task(warm_scraper())
+        # Start Nitro Pre-fetch (Populates cache in background)
+        if hasattr(scraper, '_turbo_prefetch'):
+            asyncio.create_task(scraper._turbo_prefetch())
+        logger.info("🔄 Background services activated")
+    else:
+        logger.info("🤗 Running on Hugging Face - Lightweight mode enabled")
+        # Just warm up the scraper without heavy pre-fetching
+        asyncio.create_task(warm_scraper())
+# Enable CORS for frontend
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+app.add_middleware(GZipMiddleware, minimum_size=1000)
+@app.get("/")
+async def root():
+    return {
+        "status": "online",
+        "engine": "Nitro-Power Larooza Engine",
+        "engine_status": "WARM" if scraper._cookies_synced else "COLD",
+        "cached_keys": list(cache._cache.keys())
+    }
+@app.get("/latest")
+async def get_latest(page: int = 1):
+    cache_key = f"latest_{page}"
+    cached = cache.get(cache_key)
+    if cached:
+        return cached
+    try:
+        items = await scraper.fetch_home(page=page)
+        if items:
+            cache.set(cache_key, items)
+        return items
+    except Exception as e:
+        logger.error(f"Error fetching latest: {e}")
+        raise HTTPException(status_code=500, detail=str(e))
+@app.get("/category/{cat_id}")
+async def get_category(cat_id: str, page: int = 1):
+    cache_key = f"cat_{cat_id}_{page}"
+    cached = cache.get(cache_key)
+    if cached:
+        return cached
+    try:
+        items = await scraper.fetch_category(cat_id, page=page)
+        if items:
+            cache.set(cache_key, items)
+        return items
+    except Exception as e:
+        logger.error(f"Error fetching category {cat_id}: {e}")
+        raise HTTPException(status_code=500, detail=str(e))
+@app.get("/search")
+async def search(q: str):
+    cache_key = f"search_{q}"
+    cached = cache.get(cache_key)
+    if cached:
+        return cached
+    try:
+        items = await scraper.search(q)
+        if items:
+            cache.set(cache_key, items, ttl_seconds=3600) # Search results cache longer
+        return items
+    except Exception as e:
+        logger.error(f"Error searching for {q}: {e}")
+        raise HTTPException(status_code=500, detail=str(e))
+@app.get("/details/{safe_id}")
+async def get_details(safe_id: str):
+    cache_key = f"details_{safe_id}"
+    cached = cache.get(cache_key)
+    if cached:
+        return cached
+    try:
+        details = await scraper.fetch_details(safe_id)
+        if not details:
+            return JSONResponse(status_code=404, content={"error": "Content not found"})
+        cache.set(cache_key, details, ttl_seconds=86400) # Details cache for 24h
+        return details
+    except Exception as e:
+        logger.error(f"Error fetching details for {safe_id}: {e}")
+        raise HTTPException(status_code=500, detail=str(e))
+@app.get("/proxy/image")
+async def proxy_image(url: str):
+    if not url:
+        raise HTTPException(status_code=400, detail="URL is required")
+    url = unquote(url)
+    # --- Image Disk Cache ---
+    cache_dir = os.path.join(base_dir, "cache", "images")
+    os.makedirs(cache_dir, exist_ok=True)
+    # Generate simple hash for filename
+    import hashlib
+    url_hash = hashlib.md5(url.encode()).hexdigest()
+    cache_path = os.path.join(cache_dir, f"{url_hash}.img")
+    # 1. Check if cached
+    if os.path.exists(cache_path):
+        # Check cache age (optional - 1 week)
+        if time.time() - os.path.getmtime(cache_path) < 604800:
+            return FileResponse(
+                cache_path,
+                media_type="image/jpeg", # Approximate, browser will handle
+                headers={"Cache-Control": "public, max-age=31536000"}
+            )
+    try:
+        # Using follow_redirects and a longer timeout for images
+        async with httpx.AsyncClient(timeout=20.0, follow_redirects=True) as client:
+            resp = await client.get(url, headers={"User-Agent": scraper.headers["User-Agent"]})
+            if resp.status_code == 200:
+                # Save to cache
+                content = resp.content
+                with open(cache_path, "wb") as f:
+                    f.write(content)
+                # Return the image stream directly
+                return StreamingResponse(
+                    io.BytesIO(content),
+                    media_type=resp.headers.get("Content-Type", "image/jpeg"),
+                    headers={"Cache-Control": "public, max-age=31536000"}
+                )
+            else:
+                logger.warning(f"Failed to proxy image {url} (Status: {resp.status_code})")
+                return JSONResponse(status_code=resp.status_code, content={"error": f"Failed (Status {resp.status_code})"})
+    except httpx.TimeoutException:
+        logger.warning(f"Timeout proxying image: {url}")
+        return JSONResponse(status_code=504, content={"error": "Image timeout"})
+    except Exception as e:
+        logger.error(f"Proxy image error for {url}: {type(e).__name__} - {str(e)}")
+        return JSONResponse(status_code=500, content={"error": str(e)})
+@app.get("/download/info")
+async def get_download_info(url: str):
+    try:
+        info = await downloader.get_info(url)
+        return info
+    except Exception as e:
+        logger.error(f"Download info error for {url}: {e}")
+        raise HTTPException(status_code=500, detail=str(e))
+@app.get("/download/file")
+async def download_file(url: str, filename: str = "video.mp4"):
+    """Handles file downloads, proxying if necessary to bypass IP blocks or hotlink protection."""
+    if not url:
+        raise HTTPException(status_code=400, detail="URL is required")
+    url = unquote(url)
+    # Domains that REQUIRE proxying (IP-bound or strict hotlink protection)
+    proxy_domains = [
+        "googlevideo.com",
+        "manifest.googlevideo.com",
+        "larozavideo.net",
+        "larooza.site",
+        "larooza.mom",
+        "laroza-tv.net",
+        "youtube.com",
+        "youtu.be"
+    ]
+    should_proxy = any(domain in url for domain in proxy_domains)
+    if should_proxy:
+        logger.info(f"🛡️ Proxying download: {filename[:50]}...")
+        # Clean filename for the ASCII part of Content-Disposition
+        # Remove non-ASCII characters for the fallback filename
+        ascii_filename = re.sub(r'[^\x00-\x7F]+', '_', filename)
+        encoded_filename = quote(filename)
+        async def stream_generator():
+            async with httpx.AsyncClient(timeout=None, follow_redirects=True) as client:
+                try:
+                    async with client.stream("GET", url, headers={"User-Agent": scraper.headers["User-Agent"]}) as resp:
+                        if resp.status_code != 200:
+                            logger.error(f"Proxy source returned {resp.status_code}")
+                            return
+                        # We can't easily set Content-Length here because StreamingResponse
+                        # starts before we have all chunks, but we can set it in the outer response
+                        async for chunk in resp.aiter_bytes(chunk_size=1024*1024):
+                            yield chunk
+                except Exception as e:
+                    logger.error(f"Streaming error: {e}")
+        # Get initial headers to find content length/type if possible
+        try:
+            async with httpx.AsyncClient(timeout=10.0, follow_redirects=True) as client:
+                head_resp = await client.head(url, headers={"User-Agent": scraper.headers["User-Agent"]})
+                content_length = head_resp.headers.get("Content-Length")
+                content_type = head_resp.headers.get("Content-Type", "video/mp4")
+        except:
+            content_length = None
+            content_type = "video/mp4"
+        headers = {
+            "Content-Disposition": f"attachment; filename=\"{ascii_filename}\"; filename*=UTF-8''{encoded_filename}",
+            "Access-Control-Expose-Headers": "Content-Disposition"
+        }
+        if content_length:
+            headers["Content-Length"] = content_length
+        return StreamingResponse(stream_generator(), media_type=content_type, headers=headers)
+    # For other sources, a simple redirect is much faster and saves server bandwidth
+    return RedirectResponse(url=url)
+@app.get("/health")
+async def health():
+    # Check FlareSolverr
+    fs_status = "OFFLINE"
+    try:
+        # Increase timeout as solver might be busy
+        async with httpx.AsyncClient(timeout=5.0) as client:
+            resp = await client.get("http://localhost:8191/health")
+            if resp.status_code == 200:
+                fs_status = "ONLINE"
+    except:
+        pass
+    return {
+        "backend": "ONLINE",
+        "flaresolverr": fs_status,
+        "scraper_sync": scraper._cookies_synced,
+        "timestamp": time.time()
+    }
+# --- Frontend Mounting ---
+# This ensures that our React app is served directly by FastAPI in production
+# Check both relative and same-level structures for Docker/Local compatibility
+base_dir = os.path.dirname(__file__)
+frontend_path = os.path.join(base_dir, "meih-netflix-clone", "dist")
+if not os.path.exists(frontend_path):
+    # Try one level up (local dev structure)
+    frontend_path = os.path.join(base_dir, "..", "meih-netflix-clone", "dist")
+if os.path.exists(frontend_path):
+    # Assets are usually in dist/assets and referenced as /assets/ in Vite
+    assets_path = os.path.join(frontend_path, "assets")
+    if os.path.exists(assets_path):
+        app.mount("/assets", StaticFiles(directory=assets_path), name="assets")
+    @app.get("/{full_path:path}")
+    async def serve_frontend(full_path: str):
+        # Prevent infinite recursion for API routes if someone hits a wrong URL
+        if full_path.startswith(("api/", "latest", "category/", "search", "details", "proxy", "download", "health")):
+            return JSONResponse(status_code=404, content={"error": "Not Found"})
+        # If the path starts with api/ or other backend routes, it should have been caught above
+        # Otherwise, serve the main index.html for React Router to handle
+        file_path = os.path.join(frontend_path, full_path)
+        if os.path.exists(file_path) and os.path.isfile(file_path):
+            return FileResponse(file_path)
+        return FileResponse(os.path.join(frontend_path, "index.html"))
+else:
+    logger.warning(f"Frontend dist folder not found at {frontend_path}. Frontend serving disabled.")
+if __name__ == "__main__":
+    import uvicorn
+    # Use port 7860 for Hugging Face Spaces compatibility
+    uvicorn.run(app, host="0.0.0.0", port=7860)

package.json ADDED Viewed

	@@ -0,0 +1,12 @@

+{
+  "name": "meih-movies-api",
+  "version": "1.0.0",
+  "description": "Nitro-powered movie scraping API",
+  "main": "main.py",
+  "scripts": {
+    "start": "bash start.sh"
+  },
+  "engines": {
+    "node": ">=18.x"
+  }
+}

requirements.txt ADDED Viewed

	@@ -0,0 +1,14 @@

+fastapi
+uvicorn
+httpx[http2]
+beautifulsoup4
+curl-cffi
+yt-dlp
+pydantic
+python-multipart
+aiohttp
+aiosqlite
+certifi
+websockets
+packaging
+setuptools

scraper/engine.py ADDED Viewed

	@@ -0,0 +1,996 @@

+import asyncio
+import httpx
+import re
+import logging
+import base64
+import random
+import os
+import time
+from typing import List, Dict, Optional
+from bs4 import BeautifulSoup
+from curl_cffi.requests import AsyncSession
+from urllib.parse import urljoin, quote
+from scraper.proxy_fetcher import proxy_fetcher
+# Optional dependencies for heavy bypasses
+try:
+    import undetected_chromedriver as uc
+    from selenium.webdriver.common.by import By
+    from selenium.webdriver.support.ui import WebDriverWait
+    from selenium.webdriver.support import expected_conditions as EC
+    HAS_SELENIUM = True
+except ImportError:
+    HAS_SELENIUM = False
+    logger.warning("⚠️ Selenium/Undetected-Chromedriver not installed. Nuclear bypass will be disabled.")
+# Clean, strictly used logger
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger("scraper")
+class LaroozaScraper:
+    MIRRORS = ["https://q.larozavideo.net", "https://larooza.mom", "https://larooza.site", "https://m.laroza-tv.net"]
+    BASE_URL = "https://q.larozavideo.net"
+    TARGET_URL = "https://q.larozavideo.net/newvideos1.php"
+    _blacklisted_mirrors = {}
+    # Permanent Aliases -> Keywords search
+    CATEGORY_KEYWORDS = {
+        "arabic-movies": ["أفلام عربية", "افلام عربية", "افلام عربي", "arabic-movies33"],
+        "english-movies": ["افلام اجنبية", "أفلام أجنبية", "افلام اجنبي", "أجنبي", "all_movies_13"],
+        "indian-movies": ["افلام هندي", "أفلام هندية", "هندي", "indian-movies9"],
+        "anime-movies": ["افلام انمي", "أفلام أنمي", "انمي", "anime-movies-7"],
+        "dubbed-movies": ["افلام مدبلجة", "أفلام مدبلجة", "مدبلج", "7-aflammdblgh"],
+        "turkish-series": ["مسلسلات تركية", "تركي", "turkish-3isk-seriess47"],
+        "arabic-series": ["مسلسلات عربية", "عربي", "arabic-series46"],
+        "english-series": ["مسلسلات اجنبية", "أجنبي", "english-series10"],
+        "ramadan-2025": ["رمضان 2025", "13-ramadan-2025"],
+        "ramadan-2024": ["رمضان 2024", "28-ramadan-2024"],
+        "ramadan-2023": ["رمضان 2023", "10-ramadan-2023"],
+        "asian-movies": ["آسيوي", "اسيوي", "آسيوية", "6-asian-movies"],
+        "asian-series": ["مسلسلات اسياوية", "اسياوية", "6-asya"],
+        "turkish-movies": ["افلام تركية", "أفلام تركية", "8-aflam3isk"],
+        "anime-series": ["مسلسلات انمي", "كرتون", "6-anime-series"],
+        "indian-series": ["مسلسلات هندية", "11indian-series"],
+        "tv-programs": ["برامج تلفزيون", "tv-programs12"],
+        "plays": ["مسرحيات", "masrh-5"]
+    }
+    # Manual Fallbacks for reliability
+    HARDCODED_FALLBACKS = {
+        "arabic-movies": "arabic-movies33",
+        "english-movies": "all_movies_13",
+        "indian-movies": "indian-movies9",
+        "asian-movies": "6-asian-movies",
+        "anime-movies": "anime-movies-7",
+        "dubbed-movies": "7-aflammdblgh",
+        "turkish-movies": "8-aflam3isk",
+        "arabic-series": "arabic-series46",
+        "ramadan-2025": "13-ramadan-2025",
+        "ramadan-2024": "28-ramadan-2024",
+        "ramadan-2023": "10-ramadan-2023",
+        "english-series": "english-series10",
+        "turkish-series": "turkish-3isk-seriess47",
+        "indian-series": "11indian-series",
+        "tv-programs": "tv-programs12",
+        "plays": "masrh-5",
+        "anime-series": "6-anime-series",
+        "asian-series": "6-asya"
+    }
+    def __init__(self):
+        # Primary fetcher: curl-cffi (Fastest, TLS Impersonation)
+        # Using chrome120 and disabling SSL verify for maximum compatibility
+        self.session = AsyncSession(impersonate="chrome120", timeout=30, verify=False)
+        self._cookies_synced = False
+        self._last_pw_solve = 0
+        self._ua_synced = None
+        self._chrome_version = None
+        self._domain_lock = asyncio.Lock()
+        self._warming_lock = asyncio.Lock()
+        self._proxy_refresh_interval = 1800  # 30 minutes
+        self._proxy_refresh_time = 0
+        self._semaphore = asyncio.Semaphore(5) # Reduced from 15 for stability
+        self._optimization_started = False
+        self._is_prefetching = False
+        self._domain_detected = False
+        # Hybrid Configuration
+        self.REMOTE_SOLVER_URL = "https://meih-movies-api.onrender.com/remote-fetch"
+        self.IS_RENDER = os.environ.get("RENDER") is not None
+        self.IS_HUGGINGFACE = os.environ.get("SPACE_ID") is not None
+        # Free Proxy Pool for Hugging Face (to bypass IP bans)
+        self._free_proxy_pool = []
+        self._proxy_pool_last_refresh = 0
+        self.headers = {
+            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
+            "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8",
+            "Accept-Language": "ar,en-US;q=0.9,en;q=0.8",
+            "Accept-Encoding": "gzip, deflate, br",
+            "Referer": "https://www.google.com/",
+            "Connection": "keep-alive",
+            "Sec-Fetch-Dest": "document",
+            "Sec-Fetch-Mode": "navigate",
+            "Sec-Fetch-Site": "cross-site",
+        }
+        self._session_initialized = False
+        self._session_warmed_at = 0
+        self._httpx_client = None
+        # --- Proxy Rotation System ---
+        proxy_str = os.getenv("PROXY_LIST", "")
+        self.proxies = [p.strip() for p in proxy_str.split(",") if p.strip()]
+        self._current_proxy_idx = 0
+        if self.proxies:
+            logger.info(f"✓ Proxy rotation enabled with {len(self.proxies)} endpoints")
+        self._category_map = {}
+        self._last_discovery = 0
+        self._discovery_lock = asyncio.Lock()
+        # --- Mirror & Performance ---
+        self._cache = {} # {url: (timestamp, data)}
+        self._cache_ttl = 3600 # 1 hour for data
+        self._free_proxies = []
+        self._optimization_started = False
+        self._uc_lock = asyncio.Lock()
+        self._solver_lock = asyncio.Lock() # Guard against multiple solvers
+        # We'll start optimization on the first request to avoid "no running loop" error
+    async def _optimize_connection(self):
+        """Find the fastest mirror and warm up the engine"""
+        # 1. Check if we already have a reasonably fresh fastest mirror
+        now = time.time()
+        if hasattr(self, '_fastest_mirror_detected_at') and now - self._fastest_mirror_detected_at < 3600:
+            return
+        logger.info("🔍 Testing mirror speeds (Optimized)...")
+        async def test_mirror(mirror):
+            try:
+                # very aggressive timeout for discovery
+                start = time.time()
+                test_url = f"{mirror}/newvideos1.php"
+                async with httpx.AsyncClient(timeout=1.5, follow_redirects=True, verify=False) as client:
+                    resp = await client.get(test_url)
+                    if resp.status_code == 200:
+                        return (time.time() - start, mirror)
+            except:
+                pass
+            return (999, mirror)
+        results = await asyncio.gather(*(test_mirror(m) for m in self.MIRRORS))
+        results.sort()
+        min_time, fastest_mirror = results[0]
+        if min_time < 999:
+            logger.info(f"⚡ Fastest mirror: {fastest_mirror} ({min_time:.2f}s)")
+            self.BASE_URL = fastest_mirror
+            self.TARGET_URL = f"{fastest_mirror}/newvideos1.php"
+            self._fastest_mirror_detected_at = now
+        else:
+            logger.warning("⚠️ No mirrors responded quickly, using default.")
+            self._fastest_mirror_detected_at = now - 3300 # Retry sooner
+    async def _refresh_free_proxies(self):
+        """Fetch free proxies from public APIs (for cloud deployments)"""
+        # Enable on both Hugging Face and Render.com
+        if not (self.IS_HUGGINGFACE or self.IS_RENDER):
+            return
+        now = time.time()
+        if now - self._proxy_pool_last_refresh < 300:  # Refresh every 5 minutes
+            return
+        logger.info("🔄 Refreshing free proxy pool...")
+        self._proxy_pool_last_refresh = now
+        proxy_sources = [
+            "https://api.proxyscrape.com/v2/?request=get&protocol=http&timeout=10000&country=all&ssl=all&anonymity=all",
+            "https://www.proxy-list.download/api/v1/get?type=http",
+        ]
+        new_proxies = []
+        for source in proxy_sources:
+            try:
+                async with httpx.AsyncClient(timeout=10.0) as client:
+                    resp = await client.get(source)
+                    if resp.status_code == 200:
+                        proxies = resp.text.strip().split('\n')
+                        for proxy in proxies[:10]:  # Take first 10 from each source
+                            proxy = proxy.strip()
+                            if proxy and ':' in proxy:
+                                new_proxies.append(f"http://{proxy}")
+            except Exception as e:
+                logger.warning(f"Failed to fetch proxies from {source}: {e}")
+        if new_proxies:
+            self._free_proxy_pool = new_proxies
+            logger.info(f"✅ Loaded {len(new_proxies)} free proxies")
+        else:
+            logger.warning("⚠️ No free proxies available")
+    async def _discover_categories(self, force=False):
+        """Build the category map dynamically from the homepage"""
+        async with self._discovery_lock:
+            if not force and time.time() - self._last_discovery < 3600: # Cache for 1 hour
+                return
+            logger.info("Refreshing category mapping...")
+            html = await self._get_html(self.BASE_URL)
+            if not html: return
+            soup = BeautifulSoup(html, 'html.parser')
+            new_map = {}
+            # Find all category links
+            for a in soup.find_all('a', href=True):
+                href = a['href']
+                if 'cat=' not in href: continue
+                cat_id = href.split('cat=')[-1].split('&')[0]
+                text = a.get_text(strip=True).lower()
+                # Match against keywords
+                for alias, keywords in self.CATEGORY_KEYWORDS.items():
+                    if alias not in new_map:
+                        if any(k in text for k in keywords):
+                            new_map[alias] = cat_id
+            if new_map:
+                self._category_map = new_map
+                self._last_discovery = time.time()
+                logger.info(f"✓ Mapped {len(new_map)} categories: {new_map}")
+    async def _resolve_cat_id(self, cat_id: str) -> str:
+        """Resolves an alias to a real ID, or returns the original if not an alias"""
+        await self._discover_categories()
+        # 1. Check dynamic map
+        if cat_id in self._category_map:
+            return self._category_map[cat_id]
+        # 2. Check hardcoded fallbacks if dynamic failed
+        if cat_id in self.HARDCODED_FALLBACKS:
+            return self.HARDCODED_FALLBACKS[cat_id]
+        return cat_id
+    async def _warm_session(self):
+        """Warm up session with the detected working mirror"""
+        if not self._domain_detected:
+            # We already set defaults in __init__ / class, just confirm
+            logger.info(f"🚀 Targeting exclusive source: {self.TARGET_URL}")
+            self._domain_detected = True
+        if not self._session_initialized:
+            self._session_initialized = True # Mark as init even if basic get fails, as PW will solve it
+    async def _refresh_free_proxies(self):
+        """Refresh free proxy list if needed"""
+        if time.time() - self._proxy_refresh_time > self._proxy_refresh_interval:
+            logger.info("Refreshing free proxy pool...")
+            self._free_proxies = await proxy_fetcher.get_working_proxies(max_count=15)
+            self._proxy_refresh_time = time.time()
+            logger.info(f"Loaded {len(self._free_proxies)} working free proxies")
+    def _get_proxy(self) -> Optional[str]:
+        # On cloud platforms (HF or Render), prioritize free proxy pool
+        if (self.IS_HUGGINGFACE or self.IS_RENDER) and self._free_proxy_pool:
+            proxy = self._free_proxy_pool[self._current_proxy_idx % len(self._free_proxy_pool)]
+            self._current_proxy_idx += 1
+            return proxy
+        # Try free proxies first (legacy proxy_fetcher)
+        if self._free_proxies:
+            proxy = self._free_proxies[self._current_proxy_idx % len(self._free_proxies)]
+            self._current_proxy_idx += 1
+            return proxy
+        # Fallback to configured proxies
+        if not self.proxies: return None
+        proxy = self.proxies[self._current_proxy_idx % len(self.proxies)]
+        self._current_proxy_idx += 1
+        return proxy
+    async def _get_html_with_undetected_chrome(self, url: str) -> Optional[str]:
+        """The 'NUCLEAR Option': Undetected-Chromedriver with safety locks for Windows"""
+        if not HAS_SELENIUM:
+            logger.error("❌ Cannot use UC: Selenium/Undetected-Chromedriver not installed.")
+            return None
+        async with self._uc_lock:
+            logger.info(f"💣 Launching Undetected-Chrome NUCLEAR Bypass for {url}...")
+            def get_chrome_version():
+                try:
+                    import winreg
+                    key = winreg.OpenKey(winreg.HKEY_CURRENT_USER, r'Software\Google\Chrome\BLBeacon')
+                    version, _ = winreg.QueryValueEx(key, 'version')
+                    return int(version.split('.')[0])
+                except:
+                    return 120 # Fallback
+            if not self._chrome_version:
+                self._chrome_version = get_chrome_version()
+            def chrome_task():
+                driver = None
+                try:
+                    options = uc.ChromeOptions()
+                    options.add_argument('--headless')
+                    options.add_argument('--no-sandbox')
+                    options.add_argument('--disable-dev-shm-usage')
+                    options.add_argument('--disable-gpu')
+                    options.add_argument('--window-size=1280,1024')
+                    options.add_argument('--mute-audio')
+                    options.add_argument('--disable-notifications')
+                    options.add_argument('--disable-popup-blocking')
+                    options.add_argument('--hide-scrollbars')
+                    options.add_argument('--disable-logging')
+                    options.add_argument('--log-level=3')
+                    options.add_argument('--no-first-run')
+                    options.add_argument('--no-default-browser-check')
+                    options.add_argument('--no-pings')
+                    options.add_argument('--disable-blink-features=AutomationControlled')
+                    # Disable images for maximum speed
+                    prefs = {
+                        'profile.managed_default_content_settings.images': 2,
+                        'profile.default_content_settings.images': 2
+                    }
+                    options.add_experimental_option('prefs', prefs)
+                    driver = uc.Chrome(options=options, version_main=self._chrome_version)
+                    driver.set_page_load_timeout(60)
+                    logger.info(f"💣 UC Fetching: {url}")
+                    driver.get(url)
+                    # Wait for either content or challenge
+                    time.sleep(10) # Heavy sleep for UC
+                    html = driver.page_source
+                    # Basic sync of UA
+                    ua = driver.execute_script("return navigator.userAgent")
+                    if ua:
+                        self.headers["User-Agent"] = ua
+                    return html
+                except Exception as e:
+                    logger.error(f"Undetected-Chrome failure: {e}")
+                    return None
+                finally:
+                    if driver:
+                        try: driver.quit()
+                        except: pass
+            loop = asyncio.get_event_loop()
+            return await loop.run_in_executor(None, chrome_task)
+    async def _get_html_with_flaresolverr(self, url: str) -> Optional[str]:
+        """FlareSolverr with Singleton Lock to avoid browser bloat"""
+        async with self._solver_lock:
+            # Re-check cache inside lock
+            if url in self._cache:
+                return self._cache[url][1]
+            logger.info(f"✨ Requesting FlareSolverr solve for {url}...")
+            flaresolverr_url = "http://localhost:8191/v1"
+            payload = {
+                "cmd": "request.get",
+                "url": url,
+                "maxTimeout": 60000
+            }
+            # Connection Retry Loop
+            max_conn_retries = 5 # Increased retries
+            for conn_attempt in range(max_conn_retries):
+                try:
+                    async with httpx.AsyncClient(timeout=90.0) as client:
+                        response = await client.post(flaresolverr_url, json=payload)
+                        if response.status_code == 200:
+                            data = response.json()
+                            if data.get('status') == 'ok':
+                                solution = data.get('solution', {})
+                                html = solution.get('response', '')
+                                # SYNCING LOGIC
+                                cookies = solution.get('cookies', [])
+                                ua = solution.get('userAgent', '')
+                                if ua:
+                                    self._ua_synced = ua
+                                    self.headers["User-Agent"] = ua
+                                for cookie in cookies:
+                                    # Ensure domain is set for proper cookie handling
+                                    domain = cookie.get('domain')
+                                    if not domain and url:
+                                        try:
+                                            domain = urlparse(url).netloc
+                                            if domain.startswith('www.'):
+                                                domain = domain[4:]
+                                        except:
+                                            pass
+                                    if domain:
+                                        self.session.cookies.set(
+                                            cookie['name'],
+                                            cookie['value'],
+                                            domain=domain,
+                                            path=cookie.get('path', '/'),
+                                            secure=cookie.get('secure', False),
+                                            expires=cookie.get('expires')
+                                        )
+                                self._cookies_synced = True
+                                self._last_pw_solve = time.time()
+                                logger.info("✅ Session Synced!")
+                                return html
+                            else:
+                                logger.warning(f"FlareSolverr error: {data.get('message')}")
+                        else:
+                            logger.warning(f"FlareSolverr returned status {response.status_code}")
+                except Exception as e:
+                    if conn_attempt < max_conn_retries - 1:
+                        logger.warning(f"FlareSolverr comm failed (attempt {conn_attempt+1}/{max_conn_retries}): {e}. Retrying...")
+                        await asyncio.sleep(2)
+                    else:
+                        logger.error(f"FlareSolverr comm failed after {max_conn_retries} attempts: {e}")
+            return None
+    async def _turbo_prefetch(self):
+        """Pre-fetch all major categories in parallel to populate cache instantly"""
+        if self._is_prefetching: return
+        self._is_prefetching = True
+        logger.info("🚀 NITRO MODE: Starting concurrent background pre-fetch...")
+        try:
+            # List of high-priority tasks
+            tasks = [self.fetch_home(page=1)]
+            # Map of key categories to pre-warm
+            priority_cats = list(self.CATEGORY_KEYWORDS.keys())[:15]
+            for cat_id in priority_cats:
+                tasks.append(self.fetch_category(cat_id, page=1))
+            # Run everything in parallel with semaphore protection
+            await asyncio.gather(*tasks, return_exceptions=True)
+            logger.info(f"⚡ NITRO MODE complete! Cache primed with {len(self._cache)} items.")
+        except Exception as e:
+            logger.error(f"Nitro pre-fetch failed: {e}")
+        finally:
+            self._is_prefetching = False
+    async def _get_html(self, url: str, max_retries: int = 1, follow_meta=True) -> Optional[str]:
+        """Nitro-Speed Fetch with Parallel Safety"""
+        if not self._optimization_started:
+            self._optimization_started = True
+            asyncio.create_task(self._optimize_connection())
+        async with self._semaphore:
+            now = time.time()
+            # 0. Cache Check
+            if url in self._cache:
+                ts, data = self._cache[url]
+                if now - ts < self._cache_ttl:
+                    return data
+            # Sanitize URL - Skip landing pages
+            if any(x in url for x in ["/gaza.20", "/gaza.18", "/gaza.22"]):
+                logger.info(f"Sanitizing landing page URL: {url} -> {self.TARGET_URL}")
+                url = self.TARGET_URL
+            # Refresh free proxies if on cloud platforms
+            if self.IS_HUGGINGFACE or self.IS_RENDER:
+                await self._refresh_free_proxies()
+            proxy = self._get_proxy()
+            proxy_dict = {"http": proxy, "https": proxy} if proxy else None
+            # 1. Nitro Path (curl-cffi)
+            logger.info(f"🚀 Nitro Path (curl-cffi) for {url}")
+            try:
+                # Increased timeout to 45s to handle extremely slow responses
+                resp = await self.session.get(url, headers=self.headers, timeout=45, proxies=proxy_dict)
+                status_code = resp.status_code
+                logger.info(f"📡 Nitro Path response: {status_code} ({len(resp.content)} bytes)")
+                if status_code == 200:
+                    text = resp.text
+                    # Improve Meta Refresh detection (Larooza uses this heavily for domain rotation)
+                    refresh_match = re.search(r'http-equiv=["\']refresh["\'].*?content=["\']\d+;\s*url=(.*?)["\']', text, re.I)
+                    if not refresh_match:
+                        refresh_match = re.search(r'content=["\']\d+;\s*url=(.*?)["\']', text, re.I)
+                    if refresh_match and follow_meta:
+                        new_url_raw = refresh_match.group(1).strip("'\" ")
+                        new_url = urljoin(url, new_url_raw)
+                        # Preserve query parameters if the new URL doesn't have them but the old one did
+                        if "?" not in new_url and "?" in url:
+                            query = url.split("?")[-1]
+                            new_url = f"{new_url}?{query}" if not new_url.endswith("?") else f"{new_url}{query}"
+                        # If redirecting to a known landing page or ad-trap, skip it
+                        if any(x in new_url for x in ["gaza.20", "gaza.18", "gaza.22", "gaza.24"]):
+                            logger.info(f"🚫 Skipping ad-trap redirect: {new_url}")
+                            new_url = self.TARGET_URL
+                        logger.info(f"🔄 Following meta refresh to: {new_url}")
+                        return await self._get_html(new_url, max_retries=max_retries, follow_meta=False)
+                    # More robust Cloudflare & Landing Page detection
+                    text_lower = text.lower()
+                    cf_markers = ["challenge-running", "cf-ray", "cloudflare-static", "just a moment", "verify you are human", "checking your browser"]
+                    is_cf = any(x in text_lower for x in cf_markers) or "id=\"challenge-form\"" in text_lower
+                    # Detect landing page even if 200 OK (gaza.20 redirect in JS or Meta)
+                    is_landing = "gaza.20" in text_lower or "gaza.18" in text_lower or "gaza.22" in text_lower
+                    if is_cf:
+                        logger.warning(f"⚠️ Cloudflare detected in Nitro response for {url}")
+                    elif is_landing and follow_meta:
+                        logger.info(f"🔄 Landing page detected in content for {url}, forcing target...")
+                        return await self._get_html(self.TARGET_URL, max_retries=max_retries, follow_meta=False)
+                    else:
+                        self._cache[url] = (now, text)
+                        return text
+                elif status_code == 404:
+                    logger.warning(f"⚠️ Nitro Path 404 for {url} on mirror {self.BASE_URL}")
+                    # If this was a mirror, fallback to primary domain
+                    primary_primary = self.MIRRORS[0]
+                    if self.BASE_URL != primary_primary:
+                        fallback_url = url.replace(self.BASE_URL, primary_primary)
+                        logger.info(f"🔁 Falling back to primary domain: {fallback_url}")
+                        return await self._get_html(fallback_url, max_retries=max_retries, follow_meta=True)
+                elif status_code == 403:
+                    logger.warning(f"🚫 Nitro Path 403 for {url}, falling back to solvers...")
+            except Exception as e:
+                logger.error(f"❌ Nitro Path error for {url}: {e}")
+            # 2. Solver Path
+            for att in range(max_retries):
+                # Use a specific lock for solver to prevent multiple concurrent solver requests for the same URL
+                # but allow different URLs in parallel. For simplicity, we use the existing semaphore and a small delay.
+                # Check cache again just in case another task filled it
+                if url in self._cache:
+                    return self._cache[url][1]
+                html = await self._get_html_with_flaresolverr(url)
+                if html:
+                    self._cache[url] = (now, html)
+                    return html
+                # UC Fallback for critical pages
+                if att == max_retries - 1:
+                    logger.info(f"UC Fallback for: {url}")
+                    res = await self._get_html_with_undetected_chrome(url)
+                    if res: return res
+            return None
+    def _extract_items(self, soup: BeautifulSoup) -> List[Dict]:
+        """Ultra-Fast Content Extraction with Deep Image Probing"""
+        items = []
+        if not soup: return []
+        if soup.title:
+            logger.info(f"Extracting: {soup.title.string}")
+            if "challenge" in str(soup.title).lower() or "cloudflare" in str(soup.title).lower():
+                return []
+        # Ultra-Strong Coverage for all Larooza Variants & Mirrors
+        containers = soup.select('.thumbnail, .pm-li-video, .pm-video-thumb, .video-block, .movie-item, li.col-xs-6, .box, .video-box, .video-item, .post-item')
+        if not containers:
+            # Deep scan for any link that looks like a video
+            containers = soup.select('a[href*="video.php"], a[href*="watch.php"], .video-listing-content, .card-video')
+        seen_urls = set()
+        for tag in containers:
+            # 1. Fast Link Detection
+            link = tag if (tag.name == 'a' and 'video.php' in tag.get('href', '')) else \
+                   (tag.select_one('a.ellipsis') or tag.find('a', href=lambda x: x and 'video.php' in x))
+            if not link: continue
+            href = link.get('href')
+            if not href: continue
+            full_link = urljoin(self.BASE_URL, href)
+            if full_link in seen_urls: continue
+            seen_urls.add(full_link)
+            # 2. Extract Title & Clean it
+            title_node = tag.select_one('h3, h2, .title, .ellipsis, .video-title, p')
+            title = title_node.get_text(strip=True) if title_node else ""
+            if not title and link:
+                title = link.get('title') or link.get_text(strip=True)
+            # Clean Title (Remove noisy tags for premium look)
+            for t_tag in ["مشاهدة", "فيلم", "مسلسل", "كامل", "HDCAM", "HD", "WEB-DL", "Cam", "مترجم", "اون لاين", "مدبلج"]:
+                title = title.replace(t_tag, "").strip()
+            title = re.sub(r'\d{4}', '', title).strip("- ").strip() # Remove Year
+            # 3. Deep Image Probing
+            img_node = tag.select_one('img')
+            img_url = ""
+            if img_node:
+                # Try all possible lazy-load attributes, prefer potential real URLs over base64
+                candidates = [
+                    img_node.get('data-src'),
+                    img_node.get('data-lazy-src'),
+                    img_node.get('data-original'),
+                    img_node.get('srcset'),
+                    img_node.get('src')
+                ]
+                for c in candidates:
+                    if c and not c.startswith('data:'):
+                        # Ensure it's a real URL
+                        if c.startswith('http') or c.startswith('//') or c.startswith('/'):
+                            img_url = c
+                            break
+                # If still no image, try to find ANY attribute that looks like a URL
+                if not img_url:
+                    for attr, val in img_node.attrs.items():
+                        if isinstance(val, str) and (val.startswith('http') or '.jpg' in val or '.png' in val) and not val.startswith('data:'):
+                            img_url = val
+                            break
+                if img_url and "," in img_url: # Handle srcset
+                    img_url = img_url.split(",")[0].split(" ")[0]
+            # Fallback: Check for background-image in style
+            if not img_url:
+                style = tag.get('style') or ""
+                if 'background-image' in style:
+                    m = re.search(r'url\([\'"]?(.*?)[\'"]?\)', style)
+                    if m:
+                        img_url = m.group(1)
+            if not img_url or img_url.startswith('data:'):
+                img_url = "https://placehold.co/600x400/000000/FFFFFF?text=No+Poster"
+            # Absolute URL correction
+            if img_url.startswith('//'): img_url = 'https:' + img_url
+            elif img_url.startswith('/'): img_url = self.BASE_URL + img_url
+            # Proxy through our backend for stability
+            poster = f"/proxy/image?url={quote(img_url)}"
+            # 4. Speed-optimized Series Detection
+            lt = title.lower()
+            content_type = "series" if any(x in lt for x in ['حلقة', 'مسلسل', 'episode', 'season', 'series']) else "movie"
+            items.append({
+                "id": base64.urlsafe_b64encode(full_link.encode()).decode(),
+                "title": title,
+                "poster": poster,
+                "type": content_type,
+                "duration": tag.select_one('.duration, .pm-label-duration, .time').get_text(strip=True) if tag.select_one('.duration, .pm-label-duration, .time') else ""
+            })
+        return items
+    async def fetch_home(self, page: int = 1) -> List[Dict]:
+        target = f"{self.TARGET_URL}?page={page}"
+        html = await self._get_html(target, max_retries=3)
+        if not html:
+            logger.error(f"Failed to fetch home page: {target}")
+            return []
+        items = self._extract_items(BeautifulSoup(html, 'html.parser'))
+        logger.info(f"Fetched {len(items)} items from {target}")
+        return items
+    async def fetch_category(self, cat_id: str, page: int = 1) -> List[Dict]:
+        resolved_id = await self._resolve_cat_id(cat_id)
+        target = f"{self.BASE_URL}/category.php?cat={resolved_id}&page={page}"
+        html = await self._get_html(target, max_retries=3)
+        return self._extract_items(BeautifulSoup(html, 'html.parser')) if html else []
+    def _normalize_number(self, text: str) -> int:
+        """Extract episode number from Arabic/English text"""
+        # Arabic number words mapping
+        arabic_map = {
+            'الأولى': 1, 'الاولى': 1, 'الثانية': 2, 'الثالثة': 3, 'الرابعة': 4,
+            'الخامسة': 5, 'السادسة': 6, 'السابعة': 7, 'الثامنة': 8, 'التاسعة': 9,
+            'العاشرة': 10, 'الحادية': 11, 'الثانية عشر': 12, 'الثالثة عشر': 13,
+            'الرابعة عشر': 14, 'الخامسة عشر': 15, 'السادسة عشر': 16, 'السابعة عشر': 17,
+            'الثامنة عشر': 18, 'التاسعة عشر': 19, 'العشرون': 20, 'الاخيرة': 999
+        }
+        # Try to find numeric digits first (most reliable)
+        match = re.search(r'(\d+)', text)
+        if match:
+            return int(match.group(1))
+        # Try Arabic number words
+        text_lower = text.lower()
+        for arabic_word, num in arabic_map.items():
+            if arabic_word in text_lower:
+                return num
+        # Try to extract from patterns like "الحلقة X" or "Episode X"
+        patterns = [
+            r'(?:الحلقة|حلقة|episode|ep)\s*[:\-]?\s*(\d+)',
+            r'(\d+)\s*(?:الحلقة|حلقة|episode|ep)',
+        ]
+        for pattern in patterns:
+            match = re.search(pattern, text_lower)
+            if match:
+                return int(match.group(1))
+        return 0
+    def _safe_get_episode(self, text: str, name_hint: str = None) -> int:
+        """Smarter episode number extraction with common patterns"""
+        # Remove common noise
+        clean = re.sub(r'\(.*?\)', '', text)
+        clean = re.sub(r'\[.*?\]', '', clean)
+        if name_hint:
+            # Remove the series name from the text to avoid matching numbers in the title (e.g. "2 قهوة")
+            clean = clean.replace(name_hint, "").strip()
+        # 1. Look for number after keywords (Most reliable)
+        m = re.search(r'(?:الحلقة|حلقة|ep|episode|part|p)\s*(\d+)', clean, re.I)
+        if m: return int(m.group(1))
+        # 2. Direct digits (Fallback)
+        m = re.search(r'(\d+)', clean)
+        if m: return int(m.group(1))
+        # 3. Word matches
+        return self._normalize_number(clean)
+    async def search(self, query: str) -> List[Dict]:
+        url = f"{self.BASE_URL}/search.php?keywords={quote(query)}"
+        html = await self._get_html(url, max_retries=2)
+        return self._extract_items(BeautifulSoup(html, 'html.parser')) if html else []
+    async def fetch_details(self, safe_id: str) -> Dict:
+        try:
+            url = base64.urlsafe_b64decode(safe_id).decode()
+        except: return {}
+        html = await self._get_html(url)
+        if not html: return {}
+        soup = BeautifulSoup(html, 'html.parser')
+        # Follow play.php for watch servers
+        watch_html = html
+        watch_soup = soup
+        play_a = soup.select_one('a[href*="play.php"]')
+        if play_a:
+            p_url = urljoin(self.BASE_URL, play_a.get('href'))
+            p_html = await self._get_html(p_url)
+            if p_html:
+                watch_soup = BeautifulSoup(p_html, 'html.parser')
+                watch_html = p_html
+        title = soup.find('h1').get_text(strip=True) if soup.find('h1') else "Unknown"
+        is_series = bool(soup.select('.episodes-list, .season-episodes, .vid-episodes')) or any(x in title for x in ["حلقة", "مسلسل", "الموسم"])
+        raw_poster = soup.select_one('meta[property="og:image"]')['content'] if soup.select_one('meta[property="og:image"]') else ""
+        if not raw_poster:
+            img_tag = soup.select_one('.poster img, .movie-poster img, .pm-video-watch-main img')
+            if img_tag:
+                raw_poster = img_tag.get('src') or img_tag.get('data-src')
+        poster = ""
+        if raw_poster:
+            full_poster_url = urljoin(self.BASE_URL, raw_poster)
+            poster = f"/proxy/image?url={quote(full_poster_url)}"
+        response = {
+            "id": safe_id, "title": title,
+            "description": soup.select_one('.story, .desc, .entry-content').get_text(strip=True) if soup.select_one('.story, .desc, .entry-content') else "",
+            "poster": poster,
+            "type": "series" if is_series else "movie",
+            "seasons": [], "episodes": [], "servers": [], "download_links": []
+        }
+        # --- Episodes ---
+        if is_series:
+            unique_eps = {}
+            # 1. Proactive Search: Look for a "Series Category" link
+            cat_link = None
+            # A. Check Breadcrumbs (Very reliable for series category)
+            for bc in soup.select('.breadcrumb a, .bread-crumb a, .breadcrumbs a, .pm-breadcrumb a'):
+                href = bc.get('href')
+                if href and ('cat=' in href or 'ser=' in href):
+                    # Skip generic high-level categories if possible?
+                    # Actually, we filter by title later, so it's okay.
+                    cat_link = urljoin(self.BASE_URL, href)
+                    if 'ser=' in href: # Prefer ser= over cat=
+                        break
+            # Extract clean series name for filtering
+            clean_title = title.replace("مسلسل", "").strip()
+            # Try to get name before "الحلقة" or "المواسم"
+            series_name = re.split(r'الحلقة|الموسم|حلقة|season|episode', clean_title, flags=re.I)[0].strip()
+            # Arabic numeral support for filtering
+            series_name_alt = series_name.replace('0','٠').replace('1','١').replace('2','٢').replace('3','٣').replace('4','٤').replace('5','٥').replace('6','٦').replace('7','٧').replace('8','٨').replace('9','٩')
+            logger.info(f"Targeting series name: {series_name} (Alt: {series_name_alt})")
+            # B. Check if Title itself is a link to the category or series
+            if not cat_link:
+                title_link = soup.select_one('h1 a[href*="cat="], h1 a[href*="ser="], h1 a[href*="tag.php"]')
+                if title_link:
+                    cat_link = urljoin(self.BASE_URL, title_link['href'])
+            # C. General search in links with strict patterns
+            if not cat_link:
+                for a in soup.find_all('a', href=True):
+                    href = a['href']
+                    a_text = a.get_text(strip=True)
+                    # High-confidence patterns
+                    if any(x in a_text for x in ["المسلسل:", "جميع الحلقات", "حلقات المسلسل", "كل الحلقات"]):
+                        cat_link = urljoin(self.BASE_URL, href)
+                        logger.info(f"Found cat_link via labels: {cat_link}")
+                        break
+            # D. Fallback search by title
+            if not cat_link:
+                for a in soup.find_all('a', href=True):
+                    href = a['href']
+                    if any(x in href for x in ['ser=', 'cat=', 'tag.php']):
+                        a_text = a.get_text(strip=True)
+                        if (series_name and series_name in a_text) or (series_name_alt and series_name_alt in a_text):
+                            cat_link = urljoin(self.BASE_URL, href)
+                            logger.info(f"Found cat_link via fallback title search: {cat_link}")
+                            break
+            if cat_link:
+                try:
+                    # Determine type: view-serie.php, category.php, tag.php
+                    is_view_serie = 'view-serie' in cat_link
+                    param_name = 'ser' if is_view_serie else ('t' if 'tag.php' in cat_link else 'cat')
+                    # Robust ID extraction
+                    match = re.search(f'[?&]{param_name}=([^&]+)', cat_link)
+                    if match:
+                        cat_id = match.group(1)
+                        base_deep_url = f"{self.BASE_URL}/tag.php?t={cat_id}" if param_name == 't' else \
+                                        (f"{self.BASE_URL}/view-serie.php?ser={cat_id}" if is_view_serie else \
+                                         f"{self.BASE_URL}/category.php?cat={cat_id}")
+                        logger.info(f"Deep scraping episodes from {cat_link} (ID: {cat_id})")
+                        # Fetch first 5 pages
+                        for p in range(1, 6):
+                            target_p = f"{base_deep_url}&page={p}" if p > 1 else base_deep_url
+                            p_html = await self._get_html(target_p)
+                            if not p_html: break
+                            p_items = self._extract_items(BeautifulSoup(p_html, 'html.parser'))
+                            if not p_items: break
+                            for item in p_items:
+                                # Filter Check: Use a fuzzy name match
+                                i_title = item['title']
+                                # Must match at least the first 2 words if possible, or the whole name
+                                name_parts = series_name.split()
+                                match_key = " ".join(name_parts[:2]) if len(name_parts) >= 2 else series_name
+                                if match_key in i_title or series_name in i_title or series_name_alt in i_title:
+                                    e_num = self._safe_get_episode(i_title, name_hint=series_name)
+                                    if e_num and e_num not in unique_eps:
+                                        unique_eps[e_num] = {
+                                            "id": item['id'],
+                                            "episode": e_num,
+                                            "title": i_title
+                                        }
+                            if len(p_items) < 10: break
+                except Exception as e:
+                    logger.error(f"Category episode fetch failed: {e}")
+            # 2. Local fallback: Scrape episodes from the current page
+            for ep in soup.select('.episodes-list a, .season-episodes a, .vid-episodes a, ul.episodes li a, div.caption h3 a, .movie-item a, .related-vids a'):
+                ep_href = ep.get('href')
+                if not ep_href or 'video.php' not in ep_href: continue
+                ep_url = urljoin(self.BASE_URL, ep_href)
+                ep_text = ep.get_text(strip=True)
+                # If text is empty, check for nested title
+                if not ep_text:
+                    inner = ep.find(['h3', 'span', 'strong'])
+                    if inner: ep_text = inner.get_text(strip=True)
+                # CRITICAL FILTER: Item must belong to this series
+                if series_name and series_name not in ep_text:
+                    continue
+                ep_num = self._safe_get_episode(ep_text, name_hint=series_name)
+                if ep_num and ep_num not in unique_eps:
+                    unique_eps[ep_num] = {
+                        "id": base64.urlsafe_b64encode(ep_url.encode()).decode(),
+                        "episode": ep_num,
+                        "title": ep_text
+                    }
+            response['episodes'] = sorted(list(unique_eps.values()), key=lambda x: x['episode'])
+            response['seasons'] = [{"number": 1, "episodes": response['episodes']}]
+        # --- WATCH SERVERS ---
+        watch_urls = set()
+        def is_valid_srv(url_str: str) -> bool:
+            if not url_str or 'javascript' in url_str: return False
+            if 'larooza' in url_str and 'video.php' in url_str: return False
+            if any(x in url_str.lower() for x in ['beacon', 'analytics', 'pixel', 'ads.', 'google', 'facebook']): return False
+            return True
+        # 1. Primary: WatchList & Source tags
+        server_selectors = [
+            'ul.WatchList li', '.server-list li', '#servers li', '.watch-servers li',
+            '.video-servers-list li', 'div.servers a', '.player-servers li'
+        ]
+        for sel in server_selectors:
+            for li in watch_soup.select(sel):
+                s_url = li.get('data-embed-url') or li.get('data-link') or li.get('data-embed') or li.get('data-src') or li.get('data-url')
+                if not s_url:
+                    a_tag = li.find('a', href=True)
+                    if a_tag and not a_tag['href'].startswith('javascript'):
+                        s_url = a_tag['href']
+                if s_url and is_valid_srv(s_url):
+                    if s_url.startswith('//'): s_url = "https:" + s_url
+                    full_s_url = urljoin(self.BASE_URL, s_url)
+                    if full_s_url not in watch_urls:
+                        watch_urls.add(full_s_url)
+                        name = li.get_text(strip=True) or f"سيرفر {len(response['servers']) + 1}"
+                        response['servers'].append({"name": name, "url": full_s_url, "type": "iframe"})
+        # 2. Secondary: Deep Iframe Scan
+        for ifr in watch_soup.select('iframe[src], embed[src], video source[src]'):
+            src = ifr.get('src')
+            if is_valid_srv(src):
+                if src.startswith('//'): src = "https:" + src
+                full_s_url = urljoin(self.BASE_URL, src)
+                if full_s_url not in watch_urls:
+                    watch_urls.add(full_s_url)
+                    response['servers'].append({"name": f"سيرفر سريع {len(response['servers']) + 1}", "url": full_s_url, "type": "iframe"})
+        # 3. Regex Fallback (Scripts & Global)
+        patterns = [
+            r'iframe.*?src=["\'](https?://[^"\']+)["\']',
+            r'embedUrl["\']\s*:\s*["\'](https?://[^"\']+)["\']',
+            r'file["\']\s*:\s*["\'](https?://[^"\']+\.m3u8)["\']',
+            r'source\s*src=["\'](https?://[^"\']+)["\']'
+        ]
+        for pattern in patterns:
+            for match in re.findall(pattern, watch_html, re.I):
+                if is_valid_srv(match) and match not in watch_urls:
+                    watch_urls.add(match)
+                    response['servers'].append({"name": f"سيرفر احتياطي {len(response['servers']) + 1}", "url": match, "type": "iframe"})
+        # Clean duplicates and sort by quality/relevance if possible
+        # For now, just ensuring uniqueness
+        # --- Downloads ---
+        dl_url = url.replace('video.php', 'download.php').replace('play.php', 'download.php')
+        dl_html = await self._get_html(dl_url)
+        if dl_html:
+            dl_soup = BeautifulSoup(dl_html, 'html.parser')
+            for mirror in dl_soup.select('a[target="_blank"]'):
+                m_url = mirror.get('href')
+                if m_url and 'http' in m_url:
+                    if any(x in m_url.lower() for x in ['wa.me', 'facebook.com', 'twitter.com', 'telegram.me', 't.me', 'sharer.php']):
+                        continue
+                    q_text = mirror.get_text(strip=True).replace("اضغط هنا للتحميل", "").replace("تحميل الملف", "").strip() or "رابط تحميل"
+                    response['download_links'].append({"quality": q_text, "url": m_url})
+        return response
+scraper = LaroozaScraper()

scraper/proxy_fetcher.py ADDED Viewed

	@@ -0,0 +1,66 @@

+"""
+Free Proxy Fetcher - Automatically fetches and validates free proxies
+"""
+import aiohttp
+import asyncio
+import logging
+logger = logging.getLogger("proxy_fetcher")
+class FreeProxyFetcher:
+    def __init__(self):
+        self.proxies = []
+        self.last_fetch = 0
+    async def fetch_free_proxies(self):
+        """Fetch free proxies from public APIs"""
+        proxy_sources = [
+            "https://api.proxyscrape.com/v2/?request=get&protocol=http&timeout=10000&country=all&ssl=all&anonymity=all",
+            "https://www.proxy-list.download/api/v1/get?type=http",
+        ]
+        all_proxies = []
+        async with aiohttp.ClientSession() as session:
+            for source in proxy_sources:
+                try:
+                    async with session.get(source, timeout=10) as resp:
+                        if resp.status == 200:
+                            text = await resp.text()
+                            proxies = [f"http://{line.strip()}" for line in text.split('\n') if line.strip()]
+                            all_proxies.extend(proxies[:20])  # Take first 20 from each source
+                            logger.info(f"Fetched {len(proxies)} proxies from {source}")
+                except Exception as e:
+                    logger.error(f"Failed to fetch from {source}: {e}")
+        self.proxies = all_proxies
+        logger.info(f"Total free proxies loaded: {len(self.proxies)}")
+        return self.proxies
+    async def validate_proxy(self, proxy, test_url="https://httpbin.org/ip"):
+        """Test if a proxy works"""
+        try:
+            async with aiohttp.ClientSession() as session:
+                async with session.get(test_url, proxy=proxy, timeout=5) as resp:
+                    if resp.status == 200:
+                        return True
+        except:
+            pass
+        return False
+    async def get_working_proxies(self, max_count=10):
+        """Get validated working proxies"""
+        if not self.proxies:
+            await self.fetch_free_proxies()
+        working = []
+        tasks = [self.validate_proxy(p) for p in self.proxies[:30]]
+        results = await asyncio.gather(*tasks, return_exceptions=True)
+        for proxy, is_working in zip(self.proxies[:30], results):
+            if is_working and len(working) < max_count:
+                working.append(proxy)
+        logger.info(f"Validated {len(working)} working proxies")
+        return working
+proxy_fetcher = FreeProxyFetcher()

start.sh ADDED Viewed

	@@ -0,0 +1,31 @@

+#!/bin/bash
+set -e
+echo "--- STARTING MULTI-SERVICE BOOT ---"
+# Step 1: Start FlareSolverr
+echo "[1/3] Launching FlareSolverr in background..."
+export PYTHONPATH=$PYTHONPATH:/app/flaresolverr
+export PORT=8191
+export LOG_LEVEL=info
+# Run FlareSolverr with its own directory as CWD
+(cd /app/flaresolverr && python3 flaresolverr.py) &
+# Step 2: Health Check for FlareSolverr
+echo "[2/3] Waiting for FlareSolverr to bind to port 8191..."
+MAX_RETRIES=30
+COUNT=0
+while ! curl -s http://localhost:8191/health > /dev/null; do
+    sleep 1
+    COUNT=$((COUNT+1))
+    if [ $COUNT -ge $MAX_RETRIES ]; then
+        echo "⚠️ FlareSolverr failed to start in time, continuing to FastAPI anyway..."
+        break
+    fi
+done
+echo "✅ FlareSolverr is ready!"
+# Step 3: Start FastAPI
+echo "[3/3] Launching FastAPI on port 7860..."
+uvicorn main:app --host 0.0.0.0 --port 7860 --log-level info

start_render.sh ADDED Viewed

	@@ -0,0 +1,22 @@

+#!/bin/bash
+set -e
+echo "--- RENDER.COM DEPLOYMENT ---"
+# Step 1: Start FlareSolverr
+echo "[1/2] Launching FlareSolverr in background..."
+export PYTHONPATH=$PYTHONPATH:/opt/render/project/src/flaresolverr
+export PORT_FS=8191
+export LOG_LEVEL=info
+(cd /opt/render/project/src/flaresolverr && python3 flaresolverr.py) &
+# Wait for FlareSolverr
+echo "[2/2] Waiting for FlareSolverr..."
+sleep 5
+echo "✅ FlareSolverr ready!"
+echo "--- Starting FastAPI on port $PORT ---"
+# Render provides $PORT automatically
+uvicorn main:app --host 0.0.0.0 --port ${PORT:-7860} --log-level info

tools/analyze_structure.py ADDED Viewed

	@@ -0,0 +1,36 @@

+from bs4 import BeautifulSoup
+import sys
+import io
+# Set encoding for Windows terminal
+if sys.platform == 'win32':
+    sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8')
+def analyze_html():
+    with open("flaresolverr_output.html", "r", encoding="utf-8") as f:
+        html = f.read()
+    soup = BeautifulSoup(html, 'html.parser')
+    print("--- Analyzing Links ---")
+    links = soup.find_all('a', href=True)
+    for i, a in enumerate(links[:100]):
+        href = a['href']
+        text = a.get_text(strip=True)
+        if 'cat=' in href or 'video' in href or 'movie' in href or 'series' in href:
+            print(f"{i}: Text: {text} | Href: {href}")
+    print("\n--- Analyzing Containers ---")
+    # Look for common patterns in classes
+    classes = set()
+    for tag in soup.find_all(True, class_=True):
+        for c in tag['class']:
+            classes.add(c)
+    print(f"Found {len(classes)} unique classes.")
+    # Print classes that might be containers
+    potential = [c for c in classes if any(x in c.lower() for x in ['item', 'video', 'movie', 'thumb', 'card', 'block', 'col'])]
+    print(f"Potential container classes: {potential}")
+if __name__ == "__main__":
+    analyze_html()

tools/check_mirrors.py ADDED Viewed

	@@ -0,0 +1,34 @@

+import asyncio
+import httpx
+from curl_cffi.requests import AsyncSession
+async def check_mirrors():
+    mirrors = [
+        "https://larooza.mom",
+        "https://larooza.site",
+        "https://laroza-tv.net",
+        "https://larozavideo.net",
+        "https://larooza.video",
+        "https://q.larozavideo.net"
+    ]
+    headers = {
+        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
+    }
+    for mirror in mirrors:
+        print(f"Checking {mirror}...")
+        try:
+            # Try curl-cffi first
+            async with AsyncSession(impersonate="chrome110") as s:
+                resp = await s.get(mirror, headers=headers, timeout=10)
+                print(f"  [curl-cffi] {mirror}: {resp.status_code} | Title: {resp.text[:100].replace('\n', ' ')}")
+            async with httpx.AsyncClient(http2=True, timeout=10) as client:
+                resp = await client.get(mirror, headers=headers)
+                print(f"  [httpx] {mirror}: {resp.status_code} | Title: {resp.text[:100].replace('\n', ' ')}")
+        except Exception as e:
+            print(f"  [Error] {mirror}: {e}")
+if __name__ == "__main__":
+    asyncio.run(check_mirrors())

tools/debug_fs.py ADDED Viewed

	@@ -0,0 +1,51 @@

+import asyncio
+import httpx
+import json
+import sys
+# Set encoding to utf-8 for windows console
+if sys.platform == "win32":
+    import codecs
+    sys.stdout = codecs.getwriter("utf-8")(sys.stdout.detach())
+async def test():
+    urls = [
+        "https://q.larozavideo.net/home.24",
+        "https://q.larozavideo.net/newvideos1.php",
+        "https://q.larozavideo.net/category.php?cat=all_movies_13"
+    ]
+    flaresolverr_url = "http://127.0.0.1:8191/v1"
+    async with httpx.AsyncClient(timeout=90.0) as client:
+        for url in urls:
+            print(f"\n--- Testing {url} ---")
+            payload = {
+                "cmd": "request.get",
+                "url": url,
+                "maxTimeout": 60000
+            }
+            try:
+                response = await client.post(flaresolverr_url, json=payload)
+                if response.status_code == 200:
+                    data = response.json()
+                    if data.get('status') == 'ok':
+                        solution = data.get('solution', {})
+                        html = solution.get('response', '')
+                        title = solution.get('title', '')
+                        print(f"Title found: {title}")
+                        if "video.php" in html or ".thumbnail" in html or ".box" in html:
+                            print("FOUND: Movie items are present in HTML!")
+                        else:
+                            print("FAILED: No movie items in HTML.")
+                            print(f"Snippet: {html[:500]}")
+                    else:
+                        print(f"FlareSolverr message: {data.get('message')}")
+                else:
+                    print(f"Server error: {response.status_code}")
+            except Exception as e:
+                print(f"Script error: {e}")
+if __name__ == "__main__":
+    asyncio.run(test())

tools/debug_mirrors.py ADDED Viewed

	@@ -0,0 +1,35 @@

+import asyncio
+import httpx
+from bs4 import BeautifulSoup
+async def debug_fetch():
+    mirrors = ["https://q.larozavideo.net", "https://larooza.mom", "https://larooza.site", "https://m.laroza-tv.net"]
+    async with httpx.AsyncClient(timeout=10.0, follow_redirects=True) as client:
+        for mirror in mirrors:
+            print(f"\n--- Checking mirror: {mirror} ---")
+            try:
+                resp = await client.get(mirror, headers={"User-Agent": "Mozilla/5.0"})
+                print(f"Status: {resp.status_code}")
+                if resp.status_code == 200:
+                    soup = BeautifulSoup(resp.text, 'html.parser')
+                    title = soup.title.string if soup.title else "No title"
+                    print(f"Title: {title}")
+                    selectors = ['.thumbnail', '.pm-li-video', '.pm-video-thumb', '.video-block', '.movie-item', 'li.col-xs-6', '.box', '.video-box', '.video-item', '.post-item']
+                    found = False
+                    for sel in selectors:
+                        count = len(soup.select(sel))
+                        if count > 0:
+                            print(f"  Found {count} items with selector {sel}")
+                            found = True
+                    if not found:
+                        video_links = len(soup.select('a[href*="video.php"], a[href*="watch.php"]'))
+                        print(f"  Found {video_links} video/watch links.")
+                else:
+                    print(f"  Snippet: {resp.text[:200]}")
+            except Exception as e:
+                print(f"  Error: {e}")
+if __name__ == "__main__":
+    asyncio.run(debug_fetch())

tools/debug_scraper.py ADDED Viewed

	@@ -0,0 +1,27 @@

+import asyncio
+import sys
+import os
+# Add the current directory to path
+sys.path.append(os.getcwd())
+from scraper.engine import LaroozaScraper
+# Set encoding to utf-8 for windows console
+if sys.platform == "win32":
+    import codecs
+    sys.stdout = codecs.getwriter("utf-8")(sys.stdout.detach())
+async def test():
+    scraper = LaroozaScraper()
+    print("DEBUG: Fetching latest movies...")
+    items = await scraper.fetch_home(page=1)
+    print(f"DEBUG: Found {len(items)} items.")
+    if items:
+        for i, item in enumerate(items[:3]):
+            print(f"  {i+1}. {item['title']} - ID: {item['id'][:20]}...")
+    else:
+        print("DEBUG: ❌ fetch_home returned 0 items.")
+if __name__ == "__main__":
+    asyncio.run(test())

tools/dump_html.py ADDED Viewed

	@@ -0,0 +1,25 @@

+import httpx
+import asyncio
+from bs4 import BeautifulSoup
+async def dump_html():
+    url = "https://larooza.mom" # Using the one that gave 0 links
+    headers = {
+        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
+    }
+    async with httpx.AsyncClient(timeout=20, follow_redirects=True) as client:
+        print(f"Fetching {url}...")
+        resp = await client.get(url, headers=headers)
+        print(f"Status: {resp.status_code}")
+        with open("dump.html", "w", encoding="utf-8") as f:
+            f.write(resp.text)
+        print("HTML dumped to dump.html")
+        soup = BeautifulSoup(resp.text, 'html.parser')
+        links = soup.select('a')
+        print(f"Total links: {len(links)}")
+        for a in links[:20]:
+            print(f"Link: {a.get('href')} | Text: {a.get_text(strip=True)[:30]}")
+if __name__ == "__main__":
+    asyncio.run(dump_html())

tools/dump_html_v2.py ADDED Viewed

	@@ -0,0 +1,25 @@

+import httpx
+import asyncio
+from bs4 import BeautifulSoup
+async def dump_html():
+    url = "https://q.larozavideo.net/newvideos1.php"
+    headers = {
+        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
+    }
+    async with httpx.AsyncClient(timeout=20, follow_redirects=True) as client:
+        print(f"Fetching {url}...")
+        resp = await client.get(url, headers=headers)
+        print(f"Status: {resp.status_code}")
+        print(f"Final URL: {resp.url}")
+        soup = BeautifulSoup(resp.text, 'html.parser')
+        containers = soup.select('.thumbnail, .pm-li-video, .pm-video-thumb, .video-block, .movie-item, li.col-xs-6, .box, .video-box, .video-item, .post-item')
+        print(f"Found {len(containers)} item containers.")
+        if len(containers) == 0:
+            print("Snippet of HTML:")
+            print(resp.text[:1000])
+if __name__ == "__main__":
+    asyncio.run(dump_html())

tools/extra/diagnose.py ADDED Viewed

	@@ -0,0 +1,27 @@

+import httpx
+import asyncio
+import os
+async def check_service(name, url):
+    try:
+        async with httpx.AsyncClient(timeout=5.0) as client:
+            resp = await client.get(url)
+            print(f"✅ {name} is UP ({url}) - Status: {resp.status_code}")
+            return True
+    except Exception as e:
+        print(f"❌ {name} is DOWN ({url}) - Error: {e}")
+        return False
+async def main():
+    print("--- Diagnostics ---")
+    await check_service("Backend", "http://localhost:8000/health")
+    await check_service("FlareSolverr", "http://localhost:8191/health")
+    # Try to find the tunnel URL from local logs if possible
+    print("\n--- Searching for Tunnel URL ---")
+    # This is a bit tricky, but we can try to find recent cloudflared logs
+    # Cloudflared usually doesn't log to a file unless specified, but we'll check common names
+if __name__ == "__main__":
+    asyncio.run(main())

tools/extra/expose_to_internet.bat ADDED Viewed

	@@ -0,0 +1,18 @@

+@echo off
+echo ==========================================
+echo CLOUDFLARE TUNNEL - EXPOSE TO INTERNET
+echo ==========================================
+REM Download Cloudflared (if not exists)
+if not exist cloudflared.exe (
+    echo Downloading Cloudflare Tunnel...
+    powershell -Command "Invoke-WebRequest -Uri 'https://github.com/cloudflare/cloudflared/releases/latest/download/cloudflared-windows-amd64.exe' -OutFile 'cloudflared.exe'"
+)
+REM Start tunnel
+echo Starting Cloudflare Tunnel...
+echo Your backend will be accessible via a public URL in a moment...
+echo.
+cloudflared.exe tunnel --url http://localhost:8000
+pause