Spaces:

Adarshu07
/

LS

Runtime error

App Files Files Community

Adarshu07 commited on Mar 28

Commit

f9c817e

verified ·

1 Parent(s): 6c2047b

Upload 4 files

Browse files

Files changed (4) hide show

Dockerfile +102 -0
cloudflare_provider.py +1114 -0
requirements.txt +11 -0
server.py +634 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,102 @@

+# ╔══════════════════════════════════════════════════════════════╗
+# ║  Dockerfile — Cloudflare AI API                              ║
+# ║                                                              ║
+# ║  Stack:  Python 3.11 · FastAPI · Chrome · Xvfb              ║
+# ║  Port:   7860  (HuggingFace Spaces default)                  ║
+# ║                                                              ║
+# ║  NOTE: We use Xvfb (virtual framebuffer) instead of         ║
+# ║        Chrome --headless because Cloudflare blocks           ║
+# ║        headless user agents at the WebSocket level.          ║
+# ╚══════════════════════════════════════════════════════════════╝
+FROM python:3.11-slim
+# ── System deps & Xvfb ────────────────────────────────────────
+RUN apt-get update && apt-get install -y --no-install-recommends \
+        # Xvfb virtual framebuffer
+        xvfb \
+        # Chrome runtime deps
+        wget \
+        gnupg \
+        ca-certificates \
+        libx11-6 \
+        libx11-xcb1 \
+        libxcb1 \
+        libxcomposite1 \
+        libxcursor1 \
+        libxdamage1 \
+        libxext6 \
+        libxfixes3 \
+        libxi6 \
+        libxrandr2 \
+        libxrender1 \
+        libxss1 \
+        libxtst6 \
+        libglib2.0-0 \
+        libgtk-3-0 \
+        libnspr4 \
+        libnss3 \
+        libatk1.0-0 \
+        libatk-bridge2.0-0 \
+        libdrm2 \
+        libgbm1 \
+        libcups2 \
+        libasound2 \
+        libpango-1.0-0 \
+        libpangocairo-1.0-0 \
+        fonts-liberation \
+        libappindicator3-1 \
+        xdg-utils \
+        lsb-release \
+    && rm -rf /var/lib/apt/lists/*
+# ── Google Chrome stable ───────────────────────────────────────
+RUN wget -q -O /tmp/chrome.deb \
+        https://dl.google.com/linux/direct/google-chrome-stable_current_amd64.deb \
+    && apt-get update \
+    && apt-get install -y --no-install-recommends /tmp/chrome.deb \
+    && rm -f /tmp/chrome.deb \
+    && rm -rf /var/lib/apt/lists/* \
+    # Verify Chrome installed
+    && google-chrome --version
+# ── Working directory ──────────────────────────────────────────
+WORKDIR /app
+# ── Python deps ────────────────────────────────────────────────
+COPY requirements.txt .
+RUN pip install --no-cache-dir --upgrade pip \
+    && pip install --no-cache-dir -r requirements.txt
+# ── App source ─────────────────────────────────────────────────
+COPY cloudflare_provider.py .
+COPY server.py               .
+# ── Cache directory ────────────────────────────────────────────
+RUN mkdir -p /app/cache
+# ── Non-root user (HuggingFace Spaces requirement) ─────────────
+RUN useradd -m -u 1000 appuser \
+    && chown -R appuser:appuser /app
+USER appuser
+# ── Environment ────────────────────────────────────────────────
+ENV PYTHONUNBUFFERED=1 \
+    PYTHONDONTWRITEBYTECODE=1 \
+    # Enable Xvfb virtual display — REQUIRED (no headless Chrome)
+    VR_DISPLAY=1 \
+    # Pool: 2 pre-warmed connections
+    POOL_SIZE=2 \
+    # Port
+    PORT=7860 \
+    HOST=0.0.0.0 \
+    # Health monitor interval (seconds)
+    HEALTH_INTERVAL=60 \
+    # Default model
+    DEFAULT_MODEL=@cf/moonshotai/kimi-k2.5
+# ── Expose ─────────────────────────────────────────────────────
+EXPOSE 7860
+# ── Start server ───────────────────────────────────────────────
+CMD ["python", "server.py"]

cloudflare_provider.py ADDED Viewed

	@@ -0,0 +1,1114 @@

+"""
+╔═══════════════════════════════════════════════════════════════╗
+║  cloudflare_provider.py                                       ║
+║  Cloudflare AI Playground — Reverse Engineered Provider       ║
+║                                                               ║
+║  Connection Strategy:                                         ║
+║    1. Try DIRECT Python WebSocket (no browser needed)         ║
+║    2. If blocked → launch browser with Xvfb virtual display   ║
+║       extract cookies → reconnect with cookies via Python WS  ║
+║    3. If still blocked → keep browser as WS relay             ║
+║                                                               ║
+║  Virtual Display:                                             ║
+║    Set env var  VR_DISPLAY=1  to auto-start Xvfb via          ║
+║    pyvirtualdisplay (required on headless Linux / HF Spaces). ║
+║                                                               ║
+║  NOTE: Headless Chrome is intentionally disabled —            ║
+║        Cloudflare blocks headless user agents.                ║
+╚═══════════════════════════════════════════════════════════════╝
+"""
+import atexit
+import json
+import os
+import sys
+import time
+import uuid
+import random
+import string
+import threading
+from pathlib import Path
+from typing import Generator, Optional
+# ═══════════════════════════════════════════════════════════
+# §1 — AUTO INSTALL
+# ═══════════════════════════════════════════════════════════
+def _install(pkg, pip_name=None):
+    try:
+        __import__(pkg)
+    except ImportError:
+        import subprocess
+        subprocess.check_call(
+            [sys.executable, "-m", "pip", "install", "-q", pip_name or pkg]
+        )
+_install("websocket", "websocket-client")
+import websocket as _ws_mod
+_HAS_BROWSER = False
+try:
+    _install("DrissionPage")
+    from DrissionPage import ChromiumPage, ChromiumOptions
+    _HAS_BROWSER = True
+except Exception:
+    pass
+# ═══════════════════════════════════════════════════════════
+# §1b — VIRTUAL DISPLAY (from OS env)
+# ═══════════════════════════════════════════════════════════
+def _parse_bool_env(key: str, default: bool = False) -> bool:
+    val = os.environ.get(key, "").strip().lower()
+    if not val:
+        return default
+    return val in ("1", "true", "yes", "on", "enable", "enabled")
+VR_DISPLAY = _parse_bool_env("VR_DISPLAY", default=False)
+_HAS_VIRTUAL_DISPLAY = False
+_Display = None
+if VR_DISPLAY:
+    try:
+        _install("pyvirtualdisplay", "PyVirtualDisplay")
+        from pyvirtualdisplay import Display as _Display
+        _HAS_VIRTUAL_DISPLAY = True
+    except Exception as _vd_err:
+        print(
+            f"[cloudflare] ⚠  VR_DISPLAY=1 but pyvirtualdisplay failed: {_vd_err}\n"
+            f"[cloudflare]    Make sure Xvfb is installed:  sudo apt install xvfb",
+            file=sys.stderr, flush=True,
+        )
+# ═══════════════════════════════════════════════════════════
+# §2 — CONSTANTS
+# ═══════════════════════════════════════════════════════════
+_SITE     = "https://playground.ai.cloudflare.com"
+_WS_BASE  = "wss://playground.ai.cloudflare.com/agents/playground"
+_CACHE    = Path(__file__).resolve().parent / "cache"
+_MFILE    = _CACHE / "cloudflare_models.json"
+_CHARS    = string.ascii_letters + string.digits
+_LOWER    = string.ascii_lowercase + string.digits
+_UA       = (
+    "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
+    "AppleWebKit/537.36 (KHTML, like Gecko) "
+    "Chrome/146.0.0.0 Safari/537.36"
+)
+# Model cache TTL — 6 hours
+_CACHE_TTL_SECONDS = 6 * 3600
+# ═══════════════════════════════════════════════════════════
+# §3 — MODEL TABLE  (short alias → full @cf/@hf ID)
+# ══════════════════════════════════════════════════════════��
+_SHORT_TO_FULL: dict[str, str] = {
+    "gpt-oss-120b":                     "@cf/openai/gpt-oss-120b",
+    "gpt-oss-20b":                      "@cf/openai/gpt-oss-20b",
+    "qwen1.5-0.5b-chat":               "@cf/qwen/qwen1.5-0.5b-chat",
+    "qwen1.5-1.8b-chat":               "@cf/qwen/qwen1.5-1.8b-chat",
+    "qwen1.5-7b-chat-awq":             "@cf/qwen/qwen1.5-7b-chat-awq",
+    "qwen1.5-14b-chat-awq":            "@cf/qwen/qwen1.5-14b-chat-awq",
+    "qwen2.5-coder-32b-instruct":      "@cf/qwen/qwen2.5-coder-32b-instruct",
+    "qwq-32b":                          "@cf/qwen/qwq-32b",
+    "qwen3-30b-a3b-fp8":               "@cf/qwen/qwen3-30b-a3b-fp8",
+    "gemma-2b-it-lora":                 "@cf/google/gemma-2b-it-lora",
+    "gemma-7b-it-lora":                 "@cf/google/gemma-7b-it-lora",
+    "gemma-3-12b-it":                   "@cf/google/gemma-3-12b-it",
+    "gemma-7b-it":                      "@hf/google/gemma-7b-it",
+    "starling-lm-7b-beta":             "@hf/nexusflow/starling-lm-7b-beta",
+    "llama-3-8b-instruct":             "@cf/meta/llama-3-8b-instruct",
+    "llama-3-8b-instruct-awq":         "@cf/meta/llama-3-8b-instruct-awq",
+    "llama-3.2-3b-instruct":           "@cf/meta/llama-3.2-3b-instruct",
+    "llama-3.2-1b-instruct":           "@cf/meta/llama-3.2-1b-instruct",
+    "llama-3.2-11b-vision-instruct":   "@cf/meta/llama-3.2-11b-vision-instruct",
+    "llama-3.3-70b-instruct-fp8-fast": "@cf/meta/llama-3.3-70b-instruct-fp8-fast",
+    "llama-3.1-8b-instruct-fp8":       "@cf/meta/llama-3.1-8b-instruct-fp8",
+    "llama-3.1-8b-instruct-awq":       "@cf/meta/llama-3.1-8b-instruct-awq",
+    "llama-3.1-70b-instruct":          "@cf/meta/llama-3.1-70b-instruct",
+    "llama-4-scout-17b-16e-instruct":  "@cf/meta/llama-4-scout-17b-16e-instruct",
+    "llama-2-7b-chat-fp16":            "@cf/meta/llama-2-7b-chat-fp16",
+    "llama-2-7b-chat-int8":            "@cf/meta/llama-2-7b-chat-int8",
+    "llama-2-7b-chat-hf-lora":         "@cf/meta-llama/llama-2-7b-chat-hf-lora",
+    "llama-guard-3-8b":                "@cf/meta/llama-guard-3-8b",
+    "mistral-7b-instruct-v0.1":        "@cf/mistral/mistral-7b-instruct-v0.1",
+    "mistral-7b-instruct-v0.2-lora":   "@cf/mistral/mistral-7b-instruct-v0.2-lora",
+    "mistral-7b-instruct-v0.2":        "@hf/mistral/mistral-7b-instruct-v0.2",
+    "mistral-7b-instruct-v0.1-awq":    "@hf/thebloke/mistral-7b-instruct-v0.1-awq",
+    "mistral-small-3.1-24b-instruct":  "@cf/mistralai/mistral-small-3.1-24b-instruct",
+    "deepseek-r1-distill-qwen-32b":    "@cf/deepseek-ai/deepseek-r1-distill-qwen-32b",
+    "deepseek-math-7b-instruct":       "@cf/deepseek-ai/deepseek-math-7b-instruct",
+    "deepseek-coder-6.7b-base-awq":    "@hf/thebloke/deepseek-coder-6.7b-base-awq",
+    "deepseek-coder-6.7b-instruct-awq":"@hf/thebloke/deepseek-coder-6.7b-instruct-awq",
+    "tinyllama-1.1b-chat-v1.0":        "@cf/tinyllama/tinyllama-1.1b-chat-v1.0",
+    "falcon-7b-instruct":              "@cf/tiiuae/falcon-7b-instruct",
+    "hermes-2-pro-mistral-7b":         "@hf/nousresearch/hermes-2-pro-mistral-7b",
+    "neural-chat-7b-v3-1-awq":         "@hf/thebloke/neural-chat-7b-v3-1-awq",
+    "openhermes-2.5-mistral-7b-awq":   "@hf/thebloke/openhermes-2.5-mistral-7b-awq",
+    "openchat-3.5-0106":               "@cf/openchat/openchat-3.5-0106",
+    "llama-2-13b-chat-awq":            "@hf/thebloke/llama-2-13b-chat-awq",
+    "zephyr-7b-beta-awq":              "@hf/thebloke/zephyr-7b-beta-awq",
+    "discolm-german-7b-v1-awq":        "@cf/thebloke/discolm-german-7b-v1-awq",
+    "una-cybertron-7b-v2-bf16":        "@cf/fblgit/una-cybertron-7b-v2-bf16",
+    "sqlcoder-7b-2":                   "@cf/defog/sqlcoder-7b-2",
+    "phi-2":                           "@cf/microsoft/phi-2",
+    "nemotron-3-120b-a12b":            "@cf/nvidia/nemotron-3-120b-a12b",
+    "gemma-sea-lion-v4-27b-it":        "@cf/aisingapore/gemma-sea-lion-v4-27b-it",
+    "glm-4.7-flash":                   "@cf/zai-org/glm-4.7-flash",
+    "granite-4.0-h-micro":             "@cf/ibm-granite/granite-4.0-h-micro",
+    "kimi-k2.5":                       "@cf/moonshotai/kimi-k2.5",
+}
+# ═══════════════════════════════════════════════════════════
+# §4 — HELPERS
+# ═══════════════════════════════════════════════════════════
+def _rid(n=8):
+    return "".join(random.choices(_CHARS, k=n))
+def _rid_lower(n=9):
+    return "".join(random.choices(_LOWER, k=n))
+def _make_sid():
+    return "Cloudflare-AI-Playground-" + _rid(21)
+def _make_pk():
+    return str(uuid.uuid4())
+def _make_ws_url(sid, pk):
+    return f"{_WS_BASE}/{sid}?_pk={pk}"
+def _asst_id():
+    return f"assistant_{int(time.time()*1000)}_{_rid_lower(9)}"
+def _resolve_model(name: str) -> str:
+    if not name:
+        return name
+    if name.startswith("@cf/") or name.startswith("@hf/"):
+        return name
+    return _SHORT_TO_FULL.get(name, name)
+# ═══════════════════════════════════════════════════════════
+# §5 — CONVERTER + BUILDER
+# ═══════════════════════════════════════════════════════════
+class _Conv:
+    @staticmethod
+    def to_cf(msgs):
+        sys_p, out = "", []
+        for m in msgs:
+            r, c = m.get("role", ""), m.get("content", "")
+            if r == "system":
+                sys_p = c
+            elif r == "user":
+                out.append({
+                    "role": "user",
+                    "parts": [{"type": "text", "text": c}],
+                    "id": _rid(16),
+                })
+            elif r == "assistant":
+                out.append({
+                    "id": _asst_id(),
+                    "role": "assistant",
+                    "parts": [
+                        {"type": "step-start"},
+                        {"type": "text", "text": c, "state": "done"},
+                    ],
+                })
+        return sys_p, out
+    @staticmethod
+    def to_openai(cf_msgs, system=""):
+        out = []
+        if system:
+            out.append({"role": "system", "content": system})
+        for m in cf_msgs:
+            r = m.get("role", "")
+            t = next(
+                (p.get("text", "") for p in m.get("parts", [])
+                 if p.get("type") == "text"),
+                "",
+            )
+            if r in ("user", "assistant") and t:
+                out.append({"role": r, "content": t})
+        return out
+class _Build:
+    @staticmethod
+    def user(text):
+        return {
+            "role": "user",
+            "parts": [{"type": "text", "text": text}],
+            "id": _rid(16),
+        }
+    @staticmethod
+    def asst(text, reason=""):
+        p = [{"type": "step-start"}]
+        if reason:
+            p.append({"type": "reasoning", "text": reason, "state": "done"})
+        p.append({"type": "text", "text": text, "state": "done"})
+        return {"id": _asst_id(), "role": "assistant", "parts": p}
+    @staticmethod
+    def req(msgs):
+        return {
+            "id": _rid(8),
+            "init": {
+                "method": "POST",
+                "body": json.dumps({
+                    "messages": msgs,
+                    "trigger": "submit-message",
+                }, ensure_ascii=False),
+            },
+            "type": "cf_agent_use_chat_request",
+        }
+# ═══════════════════════════════════════════════════════════
+# §6 — MODEL CACHE  (with TTL)
+# ═══════════════════════════════════════════════════════════
+class _Cache:
+    @staticmethod
+    def save(models):
+        _CACHE.mkdir(parents=True, exist_ok=True)
+        _MFILE.write_text(json.dumps({
+            "ts":        time.time(),           # epoch for TTL checks
+            "ts_human":  time.strftime("%Y-%m-%d %H:%M:%S"),
+            "models":    models,
+        }, indent=2, ensure_ascii=False))
+    @staticmethod
+    def load(ttl: int = _CACHE_TTL_SECONDS):
+        """Load cache only if it exists and is within TTL."""
+        if not _MFILE.exists():
+            return None
+        try:
+            data = json.loads(_MFILE.read_text())
+            age  = time.time() - data.get("ts", 0)
+            if age > ttl:
+                return None          # stale — force refresh
+            return data.get("models")
+        except Exception:
+            return None
+    @staticmethod
+    def clear():
+        if _MFILE.exists():
+            _MFILE.unlink()
+# ═══════════════════════════════════════════════════════════
+# §6b — VIRTUAL DISPLAY MANAGER
+# ═══════════════════════════════════════════════════════════
+class _VirtualDisplayManager:
+    """Thread-safe singleton that manages a single Xvfb display."""
+    _instance = None
+    _lock     = threading.Lock()
+    def __init__(self):
+        self._display  = None
+        self._running  = False
+        self._enabled  = VR_DISPLAY and _HAS_VIRTUAL_DISPLAY
+    @classmethod
+    def instance(cls) -> "_VirtualDisplayManager":
+        if cls._instance is None:
+            with cls._lock:
+                if cls._instance is None:
+                    cls._instance = cls()
+        return cls._instance
+    @property
+    def enabled(self) -> bool:
+        return self._enabled
+    @property
+    def running(self) -> bool:
+        return self._running
+    def start(self, width: int = 1920, height: int = 1080, depth: int = 24):
+        if not self._enabled:
+            return
+        if self._running:
+            return
+        with self._lock:
+            if self._running:
+                return
+            try:
+                self._display = _Display(
+                    visible=False,
+                    size=(width, height),
+                    color_depth=depth,
+                    backend="xvfb",
+                )
+                self._display.start()
+                self._running = True
+                _log_vd(f"✓ Virtual display started "
+                        f"({width}x{height}x{depth}) "
+                        f"on :{self._display.display}")
+            except FileNotFoundError:
+                _log_vd(
+                    "✗ Xvfb binary not found! Install:  sudo apt install xvfb"
+                )
+                self._enabled = False
+            except Exception as exc:
+                _log_vd(f"✗ Failed to start virtual display: {exc}")
+                self._enabled = False
+    def stop(self):
+        if not self._running:
+            return
+        with self._lock:
+            if not self._running:
+                return
+            try:
+                if self._display:
+                    self._display.stop()
+                    _log_vd("✓ Virtual display stopped")
+            except Exception as exc:
+                _log_vd(f"⚠ Error stopping virtual display: {exc}")
+            finally:
+                self._display = None
+                self._running = False
+    def __repr__(self):
+        state = "running" if self._running else ("idle" if self._enabled else "disabled")
+        disp  = f" :{self._display.display}" if self._running and self._display else ""
+        return f"VirtualDisplay({state}{disp})"
+def _log_vd(msg: str):
+    print(f"[cloudflare:vdisplay] {msg}", file=sys.stderr, flush=True)
+# ═══════════════════════════════════════════════════════════
+# §7 — TRANSPORT LAYER
+# ═══════════════════════════════════════════════════════════
+# ── 7a: Direct Python WebSocket ──────────────────────────
+class _DirectTransport:
+    """Pure Python WS via websocket-client with background recv thread."""
+    def __init__(self, debug=False):
+        self._ws      = None
+        self._inbox   = []
+        self._lock    = threading.Lock()
+        self._running = False
+        self._thread  = None
+        self._debug   = debug
+    def connect(self, url: str, cookies: str = "") -> bool:
+        self._ws = _ws_mod.WebSocket()
+        headers = [f"User-Agent: {_UA}"]
+        if cookies:
+            headers.append(f"Cookie: {cookies}")
+        self._ws.connect(
+            url,
+            origin=_SITE,
+            header=headers,
+            timeout=15,
+        )
+        self._running = True
+        self._thread  = threading.Thread(target=self._loop, daemon=True)
+        self._thread.start()
+        return True
+    def _loop(self):
+        self._ws.settimeout(0.05)
+        while self._running:
+            try:
+                data = self._ws.recv()
+                if data:
+                    with self._lock:
+                        self._inbox.append(data)
+            except _ws_mod.WebSocketTimeoutException:
+                continue
+            except _ws_mod.WebSocketConnectionClosedException:
+                self._running = False
+                break
+            except Exception:
+                if self._running:
+                    self._running = False
+                break
+    def send(self, data: str) -> bool:
+        try:
+            if self._ws and self._ws.connected:
+                self._ws.send(data)
+                return True
+        except Exception:
+            pass
+        return False
+    def recv(self) -> list[str]:
+        with self._lock:
+            msgs = self._inbox[:]
+            self._inbox.clear()
+        return msgs
+    @property
+    def alive(self) -> bool:
+        return self._running and self._ws is not None and self._ws.connected
+    def close(self):
+        self._running = False
+        if self._ws:
+            try:
+                self._ws.close()
+            except Exception:
+                pass
+            self._ws = None
+        if self._thread and self._thread.is_alive():
+            self._thread.join(timeout=2)
+        self._thread = None
+# ── 7b: Browser-based WebSocket (fallback) ───────────────
+_BROWSER_JS = """
+(function(){
+    if(window.__cfws) return 'exists';
+    window.__cfws = {
+        sock:null, alive:false, inbox:[], error:null,
+        connect:function(u){
+            var s=this; s.error=null; s.alive=false; s.inbox=[];
+            s.sock=new WebSocket(u);
+            s.sock.onopen=function(){s.alive=true;s.error=null};
+            s.sock.onmessage=function(e){s.inbox.push(e.data)};
+            s.sock.onerror=function(){s.error='ws_error'};
+            s.sock.onclose=function(e){
+                s.alive=false;
+                if(e.code!==1000&&e.code!==1005)s.error='closed_'+e.code
+            };
+        },
+        send:function(d){
+            if(this.sock&&this.sock.readyState===1){
+                this.sock.send(typeof d==='string'?d:JSON.stringify(d));
+                return true}return false},
+        drain:function(){
+            if(!this.inbox.length)return null;
+            var r=JSON.stringify(this.inbox);this.inbox=[];return r},
+        kill:function(){if(this.sock)this.sock.close();this.alive=false}
+    };
+    return 'ok';
+})();
+"""
+class _BrowserTransport:
+    """
+    Headless-FREE Chrome WebSocket relay via Xvfb virtual display.
+    NOTE: We intentionally do NOT use Chrome's --headless flag because
+    Cloudflare Playground detects and blocks headless user agents.
+    Instead we rely on pyvirtualdisplay / Xvfb to provide a real (but
+    invisible) X11 display on servers that have no physical monitor.
+    Set  VR_DISPLAY=1  before importing to enable this behaviour.
+    """
+    def __init__(self, debug=False):
+        self._page   = None
+        self._debug  = debug
+        self._vd_mgr = _VirtualDisplayManager.instance()
+    def connect(self, url: str, **_) -> bool:
+        if not _HAS_BROWSER:
+            raise RuntimeError(
+                "DrissionPage not available — cannot use browser fallback"
+            )
+        # ── Start virtual display (Xvfb) if enabled ──────
+        if self._vd_mgr.enabled and not self._vd_mgr.running:
+            self._vd_mgr.start()
+            if not self._vd_mgr.running:
+                raise RuntimeError(
+                    "Virtual display (Xvfb) failed to start. "
+                    "Install xvfb:  apt-get install -y xvfb"
+                )
+        if not self._vd_mgr.running:
+            raise RuntimeError(
+                "No display available and VR_DISPLAY is not set. "
+                "Set VR_DISPLAY=1 to use Xvfb virtual display, "
+                "or run on a machine with a real display. "
+                "Headless Chrome is intentionally disabled."
+            )
+        opts = ChromiumOptions()
+        opts.set_argument("--disable-blink-features=AutomationControlled")
+        opts.set_argument("--no-sandbox")
+        opts.set_argument("--disable-dev-shm-usage")
+        opts.set_argument("--disable-gpu")
+        opts.set_argument("--disable-extensions")
+        opts.set_argument("--disable-plugins")
+        opts.set_argument("--disable-infobars")
+        opts.set_argument("--window-size=1280,720")
+        # ── NO headless flag — Cloudflare blocks headless ──
+        self._page = ChromiumPage(addr_or_opts=opts)
+        self._page.get(_SITE)
+        time.sleep(4)
+        self._page.run_js(_BROWSER_JS)
+        self._page.run_js(f"window.__cfws.connect('{url}');")
+        deadline = time.time() + 15
+        while time.time() < deadline:
+            if self._page.run_js("return window.__cfws.alive;"):
+                return True
+            err = self._page.run_js("return window.__cfws.error;")
+            if err:
+                raise ConnectionError(f"Browser WS failed: {err}")
+            time.sleep(0.1)
+        raise ConnectionError("Browser WS timed out waiting for connection")
+    def send(self, data: str) -> bool:
+        try:
+            return bool(
+                self._page.run_js(
+                    f"return window.__cfws.send({json.dumps(data)});"
+                )
+            )
+        except Exception:
+            return False
+    def recv(self) -> list[str]:
+        try:
+            raw = self._page.run_js("return window.__cfws.drain();")
+        except Exception:
+            return []
+        if not raw:
+            return []
+        try:
+            batch = json.loads(raw)
+            return batch if isinstance(batch, list) else []
+        except (json.JSONDecodeError, TypeError):
+            return []
+    @property
+    def alive(self) -> bool:
+        try:
+            return bool(self._page.run_js("return window.__cfws.alive;"))
+        except Exception:
+            return False
+    def close(self):
+        if self._page:
+            try:
+                self._page.run_js("if(window.__cfws) window.__cfws.kill();")
+            except Exception:
+                pass
+            try:
+                self._page.quit()
+            except Exception:
+                pass
+            self._page = None
+# ═══════════════════════════════════════════════════════════
+# §8 — PROVIDER
+# ═════════════════���═════════════════════════════════════════
+class CloudflareProvider:
+    """
+    ☁️  Cloudflare AI Playground — fully modular provider.
+    Virtual Display (required on headless servers):
+        Set  VR_DISPLAY=1  before importing this module.
+        This starts Xvfb so Chrome has a real (invisible) display.
+        Headless Chrome is intentionally NOT used — Cloudflare blocks it.
+        $ export VR_DISPLAY=1
+        $ python server.py
+    Usage:
+        provider = CloudflareProvider()
+        for chunk in provider.chat(data="Hello!"):
+            print(chunk, end="")
+        # non-streaming:
+        response = provider.ask("What is 2+2?")
+    """
+    def __init__(
+        self,
+        model:        str   = "@cf/moonshotai/kimi-k2.5",
+        system:       str   = "You are a helpful assistant.",
+        temperature:  float = 1.0,
+        max_tokens:   int   = None,
+        timeout_init: int   = 120,
+        timeout_idle: int   = 30,
+        use_cache:    bool  = True,
+        debug:        bool  = False,
+    ):
+        self.model        = _resolve_model(model)
+        self.system       = system
+        self.temperature  = temperature
+        self.max_tokens   = max_tokens
+        self.timeout_init = timeout_init
+        self.timeout_idle = timeout_idle
+        self.use_cache    = use_cache
+        self.debug        = debug
+        self.history:        list[dict] = []
+        self.models:         list[dict] = []
+        self._chat_models:   list[dict] = []
+        self.last_response:  str = ""
+        self.last_reasoning: str = ""
+        self._sid:       str  = ""
+        self._pk:        str  = ""
+        self._transport       = None
+        self._mode:      str  = ""
+        self._on:        bool = False
+        self._boot()
+        atexit.register(self.close)
+    # ─────────────────────────────────────────────────
+    # Logging
+    # ─────────────────────────────────────────────────
+    def _d(self, *a):
+        if self.debug:
+            print("[cloudflare]", *a, file=sys.stderr, flush=True)
+    # ─────────────────────────────────────────────────
+    # Low-level WS
+    # ─────────────────────────────────────────────────
+    def _pull(self) -> list[str]:
+        msgs = self._transport.recv()
+        if self.debug:
+            for m in msgs:
+                self._d("←", str(m)[:160])
+        return msgs
+    def _push(self, obj):
+        raw = json.dumps(obj, ensure_ascii=False)
+        self._d("→", raw[:300])
+        if not self._transport.send(raw):
+            raise RuntimeError("WebSocket send failed")
+    # ─────────────────────────────────────────────────
+    # Boot — tries direct WS first, then Xvfb browser
+    # ─────────────────────────────────────────────────
+    def _boot(self):
+        self._sid = _make_sid()
+        self._pk  = _make_pk()
+        url       = _make_ws_url(self._sid, self._pk)
+        # ── Attempt 1: direct Python WebSocket ──────
+        try:
+            self._d("Trying direct Python WebSocket...")
+            t = _DirectTransport(debug=self.debug)
+            t.connect(url)
+            time.sleep(0.3)
+            if t.alive:
+                self._transport = t
+                self._mode = "direct"
+                self._d("✓ Direct connection — no browser needed!")
+            else:
+                t.close()
+                raise ConnectionError("Direct WS not alive after connect")
+        except Exception as e:
+            self._d(f"Direct failed: {e}")
+            self._d("Falling back to browser transport (Xvfb)...")
+            # ── Attempt 2: Xvfb + Chrome relay ──────
+            try:
+                t = _BrowserTransport(debug=self.debug)
+                t.connect(url)
+                self._transport = t
+                self._mode = "browser"
+                self._d("✓ Browser transport connected")
+                vd = _VirtualDisplayManager.instance()
+                if vd.running:
+                    self._d(f"  └─ {vd}")
+            except Exception as e2:
+                raise ConnectionError(
+                    f"All connection methods failed.\n"
+                    f"  Direct:  {e}\n"
+                    f"  Browser: {e2}\n"
+                    f"  Tip: ensure VR_DISPLAY=1 and xvfb is installed."
+                ) from e2
+        self._on = True
+        # ── Handshake ──────────────────────────────
+        want    = {"cf_agent_identity", "cf_agent_state", "cf_agent_mcp_servers"}
+        seen    = set()
+        deadline = time.time() + 10
+        while time.time() < deadline and seen < want:
+            for raw in self._pull():
+                try:
+                    seen.add(json.loads(raw).get("type", ""))
+                except Exception:
+                    pass
+            time.sleep(0.05)
+        self._d(f"Handshake received: {seen}")
+        self._push({"type": "cf_agent_stream_resume_request"})
+        time.sleep(0.3)
+        self._pull()
+        # ── Models + state ─────────────────────────
+        self._load_models()
+        if self.max_tokens is None:
+            self.max_tokens = self._ctx_window(self.model)
+        self._sync()
+    # ─────────────────────────────────────────────────
+    # Models
+    # ─────────────────────────────────────────────────
+    def _load_models(self):
+        if self.use_cache:
+            cached = _Cache.load()
+            if cached:
+                self.models = cached
+                self._chat_models = [
+                    m for m in self.models
+                    if m.get("task", {}).get("name") == "Text Generation"
+                ]
+                self._d(f"Loaded {len(self._chat_models)} chat models from cache")
+                return
+        self._fetch_models()
+        if self.models and self.use_cache:
+            _Cache.save(self.models)
+    def _fetch_models(self):
+        rid = str(uuid.uuid4())
+        self._push({"args": [], "id": rid, "method": "getModels", "type": "rpc"})
+        deadline = time.time() + 15
+        while time.time() < deadline:
+            for raw in self._pull():
+                try:
+                    d = json.loads(raw)
+                except Exception:
+                    continue
+                if (d.get("type") == "rpc" and d.get("id") == rid
+                        and d.get("done") and d.get("success")):
+                    self.models = d.get("result", [])
+                    self._chat_models = [
+                        m for m in self.models
+                        if m.get("task", {}).get("name") == "Text Generation"
+                    ]
+                    self._d(f"Fetched {len(self._chat_models)} chat models")
+                    return
+            time.sleep(0.05)
+        self._d("Warning: model fetch timed out")
+    def _ctx_window(self, model_name: str) -> int:
+        for m in self._chat_models:
+            if m.get("name") == model_name:
+                for p in m.get("properties", []):
+                    if p.get("property_id") == "context_window":
+                        try:
+                            return int(p["value"])
+                        except (ValueError, KeyError):
+                            pass
+        return 4096
+    def _resolve(self, name: str) -> str:
+        if not name:
+            return name
+        if name.startswith("@cf/") or name.startswith("@hf/"):
+            return name
+        for m in self._chat_models:
+            full  = m.get("name", "")
+            short = full.rsplit("/", 1)[-1]
+            if short == name or full == name:
+                return full
+        return _SHORT_TO_FULL.get(name, name)
+    # ─────────────────────────────────────────────────
+    # State Sync
+    # ─────────────────────────────────────────────────
+    def _sync(self):
+        self._push({
+            "type": "cf_agent_state",
+            "state": {
+                "model":               self.model,
+                "temperature":         self.temperature,
+                "stream":              True,
+                "system":              self.system,
+                "useExternalProvider":  False,
+                "externalProvider":     "openai",
+                "authMethod":          "provider-key",
+            },
+        })
+        time.sleep(0.15)
+        self._pull()
+    # ─────────────────────────────────────────────────
+    # Setters
+    # ─────────────────────────────────────────────────
+    def set_model(self, name: str) -> "CloudflareProvider":
+        full = self._resolve(name)
+        self.model = full
+        self.max_tokens = self._ctx_window(full)
+        if self._on:
+            self._sync()
+        return self
+    def set_system(self, prompt: str) -> "CloudflareProvider":
+        self.system = prompt
+        if self._on:
+            self._sync()
+        return self
+    def set_temperature(self, t: float) -> "CloudflareProvider":
+        self.temperature = max(0.0, min(2.0, t))
+        if self._on:
+            self._sync()
+        return self
+    def set_max_tokens(self, n: int) -> "CloudflareProvider":
+        self.max_tokens = n
+        return self
+    def clear_history(self):
+        self.history.clear()
+    def get_history(self) -> list[dict]:
+        return _Conv.to_openai(self.history, self.system)
+    # ─────────────────────────────────────────────────
+    # Model listing
+    # ─────────────────────────────────────────────────
+    def list_models(self) -> list[dict]:
+        return [{
+            "name":    m.get("name", ""),
+            "short":   m.get("name", "").rsplit("/", 1)[-1],
+            "context": self._ctx_window(m.get("name", "")),
+            "active":  m.get("name") == self.model,
+        } for m in self._chat_models]
+    def refresh_models(self):
+        _Cache.clear()
+        self._fetch_models()
+        if self.models and self.use_cache:
+            _Cache.save(self.models)
+    # ═══════════════════════════════════════════════════
+    # ★  CHAT  (streaming generator)
+    # ═══════════════════════════════════════════════════
+    def chat(
+        self,
+        data:        str        = None,
+        messages:    list[dict] = None,
+        model:       str        = None,
+        temperature: float      = None,
+        system:      str        = None,
+        max_tokens:  int        = None,
+    ) -> Generator[str, None, None]:
+        if not self._on:
+            raise RuntimeError("Not connected — call new_session()")
+        if not messages and not data:
+            raise ValueError("Provide 'messages' or 'data'")
+        changed = False
+        if model:
+            full = self._resolve(model)
+            if full != self.model:
+                self.model = full
+                self.max_tokens = self._ctx_window(full)
+                changed = True
+        if temperature is not None and temperature != self.temperature:
+            self.temperature = max(0.0, min(2.0, temperature))
+            changed = True
+        if system and system != self.system:
+            self.system = system
+            changed = True
+        if max_tokens is not None:
+            self.max_tokens = max_tokens
+        if messages:
+            sys_p, cf_msgs = _Conv.to_cf(messages)
+            if sys_p and sys_p != self.system:
+                self.system = sys_p
+                changed = True
+            self.history = cf_msgs
+        else:
+            self.history.append(_Build.user(data))
+        if changed:
+            self._sync()
+        self._push(_Build.req(self.history))
+        full_text      = ""
+        reasoning      = ""
+        error          = None
+        got_first      = False
+        done           = False
+        last_data      = time.time()
+        reasoning_open = False
+        while not done:
+            if not self._transport.alive:
+                self._d("Transport died mid-stream")
+                if not full_text:
+                    yield "[Connection lost]\n"
+                break
+            msgs = self._pull()
+            if not msgs:
+                elapsed = time.time() - last_data
+                limit = self.timeout_idle if got_first else self.timeout_init
+                if elapsed > limit:
+                    self._d(f"Timeout after {elapsed:.1f}s")
+                    if not full_text:
+                        yield "[Timeout — no response received]\n"
+                    break
+                time.sleep(0.015 if got_first else 0.04)
+                continue
+            last_data = time.time()
+            for raw in msgs:
+                try:
+                    f = json.loads(raw)
+                except Exception:
+                    continue
+                ftype = f.get("type", "")
+                if ftype != "cf_agent_use_chat_response":
+                    continue
+                body_str = f.get("body", "")
+                if body_str:
+                    try:
+                        b = json.loads(body_str)
+                    except Exception:
+                        continue
+                    bt = b.get("type", "")
+                    if bt == "reasoning-start":
+                        reasoning_open = True
+                        got_first = True
+                        yield "<think>\n"
+                    elif bt == "reasoning-delta":
+                        delta = b.get("delta", "")
+                        if delta:
+                            reasoning += delta
+                            got_first  = True
+                            yield delta
+                    elif bt == "reasoning-end":
+                        if reasoning_open:
+                            reasoning_open = False
+                            yield "\n</think>\n\n"
+                    elif bt == "text-delta":
+                        delta = b.get("delta", "")
+                        if reasoning_open:
+                            reasoning_open = False
+                            yield "\n</think>\n\n"
+                        if delta:
+                            full_text += delta
+                            got_first  = True
+                            yield delta
+                    elif bt == "error":
+                        error = b.get("message", str(b))
+                if f.get("done", False):
+                    done = True
+                    break
+        if reasoning_open:
+            yield "\n</think>\n\n"
+        if error:
+            self._d("Server error:", error)
+            if not full_text:
+                yield f"\n[Error: {error}]\n"
+        if full_text:
+            self.history.append(_Build.asst(full_text, reasoning))
+        self.last_response  = full_text
+        self.last_reasoning = reasoning
+    # ─────────────────────────────────────────────────
+    # ask() — non-streaming convenience
+    # ─────────────────────────────────────────────────
+    def ask(self, prompt: str, **kwargs) -> str:
+        return "".join(self.chat(data=prompt, **kwargs))
+    # ─────────────────────────────────────────────────
+    # Session management
+    # ─────────────────────────────────────────────────
+    def new_session(self):
+        self._close_transport()
+        self.history.clear()
+        self._boot()
+    def _close_transport(self):
+        if self._transport:
+            try:
+                self._transport.close()
+            except Exception:
+                pass
+            self._transport = None
+        self._on = False
+    def close(self):
+        self._close_transport()
+        if self._mode == "browser":
+            vd = _VirtualDisplayManager.instance()
+            vd.stop()
+        self._d("Closed.")
+    def __enter__(self):
+        return self
+    def __exit__(self, *_):
+        self.close()
+    def __del__(self):
+        try:
+            self.close()
+        except Exception:
+            pass
+    def __repr__(self):
+        s  = "✅" if self._on else "❌"
+        vd = _VirtualDisplayManager.instance()
+        vd_info = f" vdisplay={vd}" if vd.enabled else ""
+        return (
+            f"CloudflareProvider({s} mode={self._mode!r} "
+            f"model={self.model!r} max_tokens={self.max_tokens}{vd_info})"
+        )
+# ═══════════════════════════════════════════════════════════
+# §9 — PROCESS-EXIT CLEANUP
+# ═══════════════════════════════════════════════════════════
+def _cleanup_virtual_display():
+    try:
+        vd = _VirtualDisplayManager.instance()
+        vd.stop()
+    except Exception:
+        pass
+atexit.register(_cleanup_virtual_display)

requirements.txt ADDED Viewed

	@@ -0,0 +1,11 @@

+# ─── Web framework ──────────────────────────────
+fastapi>=0.111.0
+uvicorn[standard]>=0.29.0
+pydantic>=2.0.0
+# ─── Cloudflare provider deps ───────────────────
+websocket-client>=1.8.0
+DrissionPage>=4.1.0
+# ─── Virtual display (Xvfb wrapper) ─────────────
+PyVirtualDisplay>=3.0

server.py ADDED Viewed

	@@ -0,0 +1,634 @@

+"""
+╔═══════════════════════════════════════════════════════════════╗
+║  server.py — Cloudflare AI REST API                           ║
+║                                                               ║
+║  OpenAI-compatible endpoints:                                 ║
+║    POST /v1/chat/completions  (streaming + non-streaming)     ║
+║    GET  /v1/models                                            ║
+║    GET  /health                                               ║
+║    GET  /                                                     ║
+║                                                               ║
+║  Architecture:                                                ║
+║    • ProviderPool  — N pre-warmed WS connections              ║
+║    • acquire()     — queue-based fair checkout, auto-heal     ║
+║    • HealthMonitor — periodic background probe + heal         ║
+║    • SSE streaming — thread→asyncio bridge via Queue          ║
+╚═══════════════════════════════════════════════════════════════╝
+"""
+import asyncio
+import json
+import logging
+import os
+import sys
+import threading
+import time
+import uuid
+from contextlib import asynccontextmanager
+from typing import AsyncGenerator, List, Optional
+import uvicorn
+from fastapi import FastAPI, HTTPException, Request
+from fastapi.middleware.cors import CORSMiddleware
+from fastapi.responses import JSONResponse, StreamingResponse
+from pydantic import BaseModel, Field
+# ─── Import provider ────────────────────────────────────────
+sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
+from cloudflare_provider import CloudflareProvider
+# ═══════════════════════════════════════════════════════════
+# LOGGING
+# ═══════════════════════════════════════════════════════════
+logging.basicConfig(
+    level=logging.INFO,
+    format="%(asctime)s  %(levelname)-8s  %(message)s",
+    stream=sys.stdout,
+    datefmt="%H:%M:%S",
+)
+log = logging.getLogger("cf-api")
+# ═══════════════════════════════════════════════════════════
+# CONFIG  (all tunable via environment variables)
+# ═══════════════════════════════════════════════════════════
+POOL_SIZE        = int(os.getenv("POOL_SIZE",        "2"))
+PORT             = int(os.getenv("PORT",             "7860"))
+HOST             =     os.getenv("HOST",             "0.0.0.0")
+HEALTH_INTERVAL  = int(os.getenv("HEALTH_INTERVAL",  "60"))   # seconds
+ACQUIRE_TIMEOUT  = int(os.getenv("ACQUIRE_TIMEOUT",  "60"))   # wait for free slot
+STREAM_TIMEOUT   = int(os.getenv("STREAM_TIMEOUT",   "120"))  # total stream timeout
+DEFAULT_MODEL    =     os.getenv("DEFAULT_MODEL",    "@cf/moonshotai/kimi-k2.5")
+DEFAULT_SYSTEM   =     os.getenv("DEFAULT_SYSTEM",   "You are a helpful assistant.")
+# ═══════════════════════════════════════════════════════════
+# PYDANTIC SCHEMAS
+# ═══════════════════════════════════════════════════════════
+class Message(BaseModel):
+    role:    str
+    content: str
+class ChatRequest(BaseModel):
+    model:       str            = DEFAULT_MODEL
+    messages:    List[Message]
+    temperature: float          = Field(default=1.0, ge=0.0, le=2.0)
+    max_tokens:  Optional[int]  = None
+    stream:      bool           = True
+    system:      Optional[str]  = None   # extra system-prompt override
+class CompletionChoice(BaseModel):
+    index:         int
+    message:       dict
+    finish_reason: str
+class CompletionResponse(BaseModel):
+    id:      str
+    object:  str
+    created: int
+    model:   str
+    choices: List[CompletionChoice]
+    usage:   dict
+# ═══════════════════════════════════════════════════════════
+# MANAGED PROVIDER  (pool slot)
+# ═══════════════════════════════════════════════════════════
+class ManagedProvider:
+    """A single pool slot wrapping one CloudflareProvider instance."""
+    def __init__(self, slot_id: int):
+        self.slot_id     = slot_id
+        self.provider: Optional[CloudflareProvider] = None
+        self.busy        = False
+        self.born_at     = 0.0
+        self.error_count = 0
+        self.request_count = 0
+    # ── Health ──────────────────────────────────────
+    def is_healthy(self) -> bool:
+        if self.provider is None:
+            return False
+        try:
+            return (
+                self.provider._on
+                and self.provider._transport is not None
+                and self.provider._transport.alive
+            )
+        except Exception:
+            return False
+    # ── Teardown ────────────────────────────────────
+    def close(self):
+        p = self.provider
+        self.provider = None
+        if p:
+            try:
+                p.close()
+            except Exception:
+                pass
+    def __repr__(self):
+        state = "busy" if self.busy else ("ok" if self.is_healthy() else "dead")
+        mode  = self.provider._mode if self.provider else "none"
+        return (
+            f"<Slot#{self.slot_id} {state} mode={mode!r} "
+            f"reqs={self.request_count} errs={self.error_count}>"
+        )
+# ═══════════════════════════════════════════════════════════
+# PROVIDER POOL
+# ═══════════════════════════════════════════════════════════
+class ProviderPool:
+    """
+    Pre-warmed pool of CloudflareProvider connections.
+    • initialize()   — create all slots at startup
+    • acquire()      — async context manager; blocks until a free slot
+    • health_monitor — background coroutine; heals broken idle slots
+    • shutdown()     — clean teardown
+    """
+    def __init__(self, size: int = 2):
+        self.size      = size
+        self._slots: List[ManagedProvider] = []
+        self._queue: asyncio.Queue         = None   # set in initialize()
+        self._loop:  asyncio.AbstractEventLoop = None
+        self._lock   = asyncio.Lock()
+    # ─── Startup ──────────────────────────────────
+    async def initialize(self):
+        self._loop  = asyncio.get_event_loop()
+        self._queue = asyncio.Queue(maxsize=self.size)
+        log.info(f"🚀 Initializing provider pool  (slots={self.size})")
+        results = await asyncio.gather(
+            *[self._spawn_slot(i) for i in range(self.size)],
+            return_exceptions=True,
+        )
+        ok = sum(1 for r in results if not isinstance(r, Exception))
+        log.info(f"   Pool ready — {ok}/{self.size} slots healthy")
+        if ok == 0:
+            raise RuntimeError(
+                "No provider slots could connect. Check network / Xvfb setup."
+            )
+    async def _spawn_slot(self, slot_id: int) -> ManagedProvider:
+        managed = ManagedProvider(slot_id)
+        def _create() -> CloudflareProvider:
+            log.info(f"   [S{slot_id}] Connecting...")
+            return CloudflareProvider(
+                model     = DEFAULT_MODEL,
+                system    = DEFAULT_SYSTEM,
+                debug     = False,
+                use_cache = True,
+            )
+        managed.provider = await asyncio.wait_for(
+            self._loop.run_in_executor(None, _create),
+            timeout=180,
+        )
+        managed.born_at = time.time()
+        self._slots.append(managed)
+        await self._queue.put(managed)
+        mode = managed.provider._mode
+        log.info(f"   [S{slot_id}] ✓ Ready  mode={mode!r}")
+        return managed
+    # ─── Acquire ──────────────────────────────────
+    @asynccontextmanager
+    async def acquire(self):
+        """Checkout a provider, yield it, return on exit (healing if needed)."""
+        managed: ManagedProvider = await asyncio.wait_for(
+            self._queue.get(),
+            timeout=ACQUIRE_TIMEOUT,
+        )
+        managed.busy = True
+        ok = True
+        try:
+            # Heal before handing out
+            if not managed.is_healthy():
+                log.warning(f"[S{managed.slot_id}] Unhealthy — healing before use")
+                await self._heal(managed)
+            managed.request_count += 1
+            yield managed.provider
+        except Exception:
+            managed.error_count += 1
+            ok = False
+            raise
+        finally:
+            managed.busy = False
+            # After use: return if healthy, else heal in background
+            if managed.is_healthy():
+                await self._queue.put(managed)
+            else:
+                log.warning(f"[S{managed.slot_id}] Unhealthy after use — background heal")
+                asyncio.create_task(self._heal_then_return(managed))
+    # ─── Healing ──────────────────────────────────
+    async def _heal(self, managed: ManagedProvider):
+        sid = managed.slot_id
+        def _recreate() -> CloudflareProvider:
+            managed.close()
+            return CloudflareProvider(
+                model     = DEFAULT_MODEL,
+                system    = DEFAULT_SYSTEM,
+                debug     = False,
+                use_cache = True,
+            )
+        managed.provider = await asyncio.wait_for(
+            self._loop.run_in_executor(None, _recreate),
+            timeout=180,
+        )
+        managed.born_at     = time.time()
+        managed.error_count = 0
+        log.info(f"[S{sid}] ✓ Healed  mode={managed.provider._mode!r}")
+    async def _heal_then_return(self, managed: ManagedProvider):
+        try:
+            await self._heal(managed)
+        except Exception as e:
+            log.error(f"[S{managed.slot_id}] Heal failed: {e}")
+            # Try a brand-new slot as last resort
+            try:
+                managed.close()
+                managed.provider = await asyncio.wait_for(
+                    self._loop.run_in_executor(
+                        None,
+                        lambda: CloudflareProvider(
+                            model=DEFAULT_MODEL, system=DEFAULT_SYSTEM,
+                            debug=False, use_cache=True,
+                        ),
+                    ),
+                    timeout=180,
+                )
+                managed.born_at = time.time()
+                managed.error_count = 0
+                log.info(f"[S{managed.slot_id}] ✓ Cold-boot recovery succeeded")
+            except Exception as e2:
+                log.error(f"[S{managed.slot_id}] Cold-boot also failed: {e2}")
+        await self._queue.put(managed)
+    # ─── Health monitor ───────────────────────────
+    async def health_monitor(self):
+        """Periodic background coroutine — checks and heals idle slots."""
+        while True:
+            await asyncio.sleep(HEALTH_INTERVAL)
+            healthy = sum(1 for m in self._slots if m.is_healthy())
+            busy    = sum(1 for m in self._slots if m.busy)
+            log.info(
+                f"♥  Health check — {healthy}/{self.size} healthy, "
+                f"{busy} busy, queue={self._queue.qsize()}"
+            )
+            for managed in list(self._slots):
+                if not managed.busy and not managed.is_healthy():
+                    log.warning(f"[S{managed.slot_id}] Idle but unhealthy — healing")
+                    # Pull from queue if it's still there, otherwise skip
+                    asyncio.create_task(self._heal_then_return(managed))
+    # ─── Status ───────────────────────────────────
+    @property
+    def status(self) -> dict:
+        return {
+            "pool_size":    self.size,
+            "queue_free":   self._queue.qsize() if self._queue else 0,
+            "slots": [
+                {
+                    "id":       m.slot_id,
+                    "healthy":  m.is_healthy(),
+                    "busy":     m.busy,
+                    "mode":     m.provider._mode if m.provider else "none",
+                    "errors":   m.error_count,
+                    "requests": m.request_count,
+                    "age_s":    round(time.time() - m.born_at, 1) if m.born_at else 0,
+                }
+                for m in self._slots
+            ],
+        }
+    # ─── Shutdown ─────────────────────────────────
+    async def shutdown(self):
+        log.info("Shutting down provider pool...")
+        for m in self._slots:
+            m.close()
+        log.info("Pool shut down.")
+# ═══════════════════════════════════════════════════════════
+# GLOBAL POOL REFERENCE
+# ═══════════════════════════════════════════════════════════
+pool: ProviderPool = None
+# ═══════════════════════════════════════════════════════════
+# LIFESPAN  (startup / shutdown)
+# ═══════════════════════════════════════════════════════════
+@asynccontextmanager
+async def lifespan(app: FastAPI):
+    global pool
+    pool = ProviderPool(size=POOL_SIZE)
+    await pool.initialize()
+    monitor = asyncio.create_task(pool.health_monitor())
+    log.info(f"✅ Server ready on {HOST}:{PORT}")
+    yield
+    monitor.cancel()
+    try:
+        await monitor
+    except asyncio.CancelledError:
+        pass
+    await pool.shutdown()
+# ═══════════════════════════════════════════════════════════
+# FASTAPI APP
+# ═══════════════════════════════════════════════════════════
+app = FastAPI(
+    title        = "Cloudflare AI API",
+    description  = "OpenAI-compatible streaming API via Cloudflare AI Playground",
+    version      = "1.0.0",
+    lifespan     = lifespan,
+    docs_url     = "/docs",
+    redoc_url    = "/redoc",
+)
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins  = ["*"],
+    allow_methods  = ["*"],
+    allow_headers  = ["*"],
+)
+# ═══════════════════════════════════════════════════════════
+# SSE STREAMING HELPERS
+# ═══════════════════════════════════════════════════════════
+def _sse_chunk(content: str, model: str, chunk_id: str) -> str:
+    """Format one SSE data line in OpenAI chunk format."""
+    payload = {
+        "id":      chunk_id,
+        "object":  "chat.completion.chunk",
+        "created": int(time.time()),
+        "model":   model,
+        "choices": [{
+            "index":         0,
+            "delta":         {"content": content},
+            "finish_reason": None,
+        }],
+    }
+    return f"data: {json.dumps(payload, ensure_ascii=False)}\n\n"
+def _sse_done(model: str, chunk_id: str) -> str:
+    """Final SSE chunk with finish_reason=stop."""
+    payload = {
+        "id":      chunk_id,
+        "object":  "chat.completion.chunk",
+        "created": int(time.time()),
+        "model":   model,
+        "choices": [{
+            "index":         0,
+            "delta":         {},
+            "finish_reason": "stop",
+        }],
+    }
+    return f"data: {json.dumps(payload)}\n\ndata: [DONE]\n\n"
+def _sse_error(msg: str) -> str:
+    return f"data: {{\"error\": {json.dumps(msg)}}}\n\ndata: [DONE]\n\n"
+async def _stream_generator(
+    provider: CloudflareProvider,
+    req:      ChatRequest,
+) -> AsyncGenerator[str, None]:
+    """
+    Bridge between the synchronous provider.chat() generator and
+    FastAPI's async StreamingResponse.
+    Strategy:
+      1. Spin up a background thread that runs provider.chat() and
+         pushes each chunk into an asyncio.Queue.
+      2. Yield SSE-formatted chunks from the queue in the async loop.
+    """
+    loop     = asyncio.get_event_loop()
+    q: asyncio.Queue = asyncio.Queue(maxsize=256)
+    chunk_id = f"chatcmpl-{uuid.uuid4().hex[:20]}"
+    cancel   = threading.Event()
+    # Build kwargs for provider
+    messages = [{"role": m.role, "content": m.content} for m in req.messages]
+    kwargs: dict = {
+        "messages":    messages,
+        "temperature": req.temperature,
+    }
+    if req.model:
+        kwargs["model"] = req.model
+    if req.max_tokens:
+        kwargs["max_tokens"] = req.max_tokens
+    if req.system:
+        kwargs["system"] = req.system
+    # ── Worker thread ────────────────────────────
+    def _worker():
+        try:
+            for chunk in provider.chat(**kwargs):
+                if cancel.is_set():
+                    break
+                fut = asyncio.run_coroutine_threadsafe(q.put(chunk), loop)
+                fut.result(timeout=10)   # backpressure: block thread if queue full
+        except Exception as exc:
+            err = RuntimeError(f"Stream error: {exc}")
+            asyncio.run_coroutine_threadsafe(q.put(err), loop).result(timeout=5)
+        finally:
+            asyncio.run_coroutine_threadsafe(q.put(None), loop).result(timeout=5)
+    t = threading.Thread(target=_worker, daemon=True)
+    t.start()
+    # ── Async consumer ────────────────────────────
+    try:
+        while True:
+            item = await asyncio.wait_for(q.get(), timeout=STREAM_TIMEOUT)
+            if item is None:                         # sentinel — stream done
+                yield _sse_done(req.model, chunk_id)
+                break
+            if isinstance(item, Exception):          # error from worker
+                yield _sse_error(str(item))
+                break
+            if item:                                 # normal text chunk
+                yield _sse_chunk(item, req.model, chunk_id)
+    except asyncio.TimeoutError:
+        cancel.set()
+        yield _sse_error("Stream timed out — no data received")
+    finally:
+        cancel.set()
+        t.join(timeout=5)
+# ═══════════════════════════════════════════════════════════
+# ENDPOINTS
+# ═══════════════════════════════════════════════════════════
+@app.get("/", tags=["Info"])
+async def root():
+    return {
+        "service":  "Cloudflare AI API",
+        "version":  "1.0.0",
+        "status":   "running",
+        "endpoints": {
+            "chat":   "POST /v1/chat/completions",
+            "models": "GET  /v1/models",
+            "health": "GET  /health",
+            "docs":   "GET  /docs",
+        },
+    }
+@app.get("/health", tags=["Info"])
+async def health():
+    if pool is None:
+        raise HTTPException(503, detail="Pool not yet initialized")
+    healthy = sum(1 for m in pool._slots if m.is_healthy())
+    status  = "ok" if healthy > 0 else "degraded"
+    return JSONResponse(
+        content={"status": status, "pool": pool.status},
+        status_code=200 if status == "ok" else 206,
+    )
+@app.get("/v1/models", tags=["Models"])
+async def list_models():
+    if pool is None:
+        raise HTTPException(503, detail="Pool not initialized")
+    async with pool.acquire() as provider:
+        models = await asyncio.get_event_loop().run_in_executor(
+            None, provider.list_models
+        )
+    return {
+        "object": "list",
+        "data": [
+            {
+                "id":       m["name"],
+                "object":   "model",
+                "created":  0,
+                "owned_by": "cloudflare",
+                "context_window": m.get("context", 4096),
+            }
+            for m in models
+        ],
+    }
+@app.post("/v1/chat/completions", tags=["Chat"])
+async def chat_completions(req: ChatRequest, request: Request):
+    if pool is None:
+        raise HTTPException(503, detail="Pool not initialized")
+    if not req.messages:
+        raise HTTPException(400, detail="`messages` must not be empty")
+    # ── Streaming ──────────────────────────────────────────
+    if req.stream:
+        async def _gen():
+            async with pool.acquire() as provider:
+                async for chunk in _stream_generator(provider, req):
+                    # Check if client disconnected
+                    if await request.is_disconnected():
+                        break
+                    yield chunk
+        return StreamingResponse(
+            _gen(),
+            media_type = "text/event-stream",
+            headers    = {
+                "Cache-Control":     "no-cache",
+                "X-Accel-Buffering": "no",
+                "Connection":        "keep-alive",
+            },
+        )
+    # ── Non-streaming ──────────────────────────────────────
+    messages = [{"role": m.role, "content": m.content} for m in req.messages]
+    kwargs: dict = {
+        "messages":    messages,
+        "temperature": req.temperature,
+    }
+    if req.model:
+        kwargs["model"] = req.model
+    if req.max_tokens:
+        kwargs["max_tokens"] = req.max_tokens
+    if req.system:
+        kwargs["system"] = req.system
+    loop = asyncio.get_event_loop()
+    async with pool.acquire() as provider:
+        full_parts: list[str] = []
+        def _collect():
+            for chunk in provider.chat(**kwargs):
+                full_parts.append(chunk)
+        await asyncio.wait_for(
+            loop.run_in_executor(None, _collect),
+            timeout=STREAM_TIMEOUT,
+        )
+    response_text = "".join(full_parts)
+    return {
+        "id":      f"chatcmpl-{uuid.uuid4().hex[:20]}",
+        "object":  "chat.completion",
+        "created": int(time.time()),
+        "model":   req.model,
+        "choices": [{
+            "index":         0,
+            "message":       {"role": "assistant", "content": response_text},
+            "finish_reason": "stop",
+        }],
+        "usage": {
+            "prompt_tokens":     0,
+            "completion_tokens": 0,
+            "total_tokens":      0,
+        },
+    }
+# ═══════════════════════════════════════════════════════════
+# ENTRY POINT
+# ═══════════════════════════════════════════════════════════
+if __name__ == "__main__":
+    uvicorn.run(
+        "server:app",
+        host      = HOST,
+        port      = PORT,
+        log_level = "info",
+        workers   = 1,          # single worker — state is in-process
+        loop      = "asyncio",
+        timeout_keep_alive = 30,
+    )