Adarshu07 commited on
Commit
f9c817e
Β·
verified Β·
1 Parent(s): 6c2047b

Upload 4 files

Browse files
Files changed (4) hide show
  1. Dockerfile +102 -0
  2. cloudflare_provider.py +1114 -0
  3. requirements.txt +11 -0
  4. server.py +634 -0
Dockerfile ADDED
@@ -0,0 +1,102 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # ╔══════════════════════════════════════════════════════════════╗
2
+ # β•‘ Dockerfile β€” Cloudflare AI API β•‘
3
+ # β•‘ β•‘
4
+ # β•‘ Stack: Python 3.11 Β· FastAPI Β· Chrome Β· Xvfb β•‘
5
+ # β•‘ Port: 7860 (HuggingFace Spaces default) β•‘
6
+ # β•‘ β•‘
7
+ # β•‘ NOTE: We use Xvfb (virtual framebuffer) instead of β•‘
8
+ # β•‘ Chrome --headless because Cloudflare blocks β•‘
9
+ # β•‘ headless user agents at the WebSocket level. β•‘
10
+ # β•šβ•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•
11
+
12
+ FROM python:3.11-slim
13
+
14
+ # ── System deps & Xvfb ────────────────────────────────────────
15
+ RUN apt-get update && apt-get install -y --no-install-recommends \
16
+ # Xvfb virtual framebuffer
17
+ xvfb \
18
+ # Chrome runtime deps
19
+ wget \
20
+ gnupg \
21
+ ca-certificates \
22
+ libx11-6 \
23
+ libx11-xcb1 \
24
+ libxcb1 \
25
+ libxcomposite1 \
26
+ libxcursor1 \
27
+ libxdamage1 \
28
+ libxext6 \
29
+ libxfixes3 \
30
+ libxi6 \
31
+ libxrandr2 \
32
+ libxrender1 \
33
+ libxss1 \
34
+ libxtst6 \
35
+ libglib2.0-0 \
36
+ libgtk-3-0 \
37
+ libnspr4 \
38
+ libnss3 \
39
+ libatk1.0-0 \
40
+ libatk-bridge2.0-0 \
41
+ libdrm2 \
42
+ libgbm1 \
43
+ libcups2 \
44
+ libasound2 \
45
+ libpango-1.0-0 \
46
+ libpangocairo-1.0-0 \
47
+ fonts-liberation \
48
+ libappindicator3-1 \
49
+ xdg-utils \
50
+ lsb-release \
51
+ && rm -rf /var/lib/apt/lists/*
52
+
53
+ # ── Google Chrome stable ───────────────────────────────────────
54
+ RUN wget -q -O /tmp/chrome.deb \
55
+ https://dl.google.com/linux/direct/google-chrome-stable_current_amd64.deb \
56
+ && apt-get update \
57
+ && apt-get install -y --no-install-recommends /tmp/chrome.deb \
58
+ && rm -f /tmp/chrome.deb \
59
+ && rm -rf /var/lib/apt/lists/* \
60
+ # Verify Chrome installed
61
+ && google-chrome --version
62
+
63
+ # ── Working directory ──────────────────────────────────────────
64
+ WORKDIR /app
65
+
66
+ # ── Python deps ────────────────────────────────────────────────
67
+ COPY requirements.txt .
68
+ RUN pip install --no-cache-dir --upgrade pip \
69
+ && pip install --no-cache-dir -r requirements.txt
70
+
71
+ # ── App source ─────────────────────────────────────────────────
72
+ COPY cloudflare_provider.py .
73
+ COPY server.py .
74
+
75
+ # ── Cache directory ────────────────────────────────────────────
76
+ RUN mkdir -p /app/cache
77
+
78
+ # ── Non-root user (HuggingFace Spaces requirement) ─────────────
79
+ RUN useradd -m -u 1000 appuser \
80
+ && chown -R appuser:appuser /app
81
+ USER appuser
82
+
83
+ # ── Environment ────────────────────────────────────────────────
84
+ ENV PYTHONUNBUFFERED=1 \
85
+ PYTHONDONTWRITEBYTECODE=1 \
86
+ # Enable Xvfb virtual display β€” REQUIRED (no headless Chrome)
87
+ VR_DISPLAY=1 \
88
+ # Pool: 2 pre-warmed connections
89
+ POOL_SIZE=2 \
90
+ # Port
91
+ PORT=7860 \
92
+ HOST=0.0.0.0 \
93
+ # Health monitor interval (seconds)
94
+ HEALTH_INTERVAL=60 \
95
+ # Default model
96
+ DEFAULT_MODEL=@cf/moonshotai/kimi-k2.5
97
+
98
+ # ── Expose ─────────────────────────────────────────────────────
99
+ EXPOSE 7860
100
+
101
+ # ── Start server ───────────────────────────────────────────────
102
+ CMD ["python", "server.py"]
cloudflare_provider.py ADDED
@@ -0,0 +1,1114 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ ╔═══════════════════════════════════════════════════════════════╗
3
+ β•‘ cloudflare_provider.py β•‘
4
+ β•‘ Cloudflare AI Playground β€” Reverse Engineered Provider β•‘
5
+ β•‘ β•‘
6
+ β•‘ Connection Strategy: β•‘
7
+ β•‘ 1. Try DIRECT Python WebSocket (no browser needed) β•‘
8
+ β•‘ 2. If blocked β†’ launch browser with Xvfb virtual display β•‘
9
+ β•‘ extract cookies β†’ reconnect with cookies via Python WS β•‘
10
+ β•‘ 3. If still blocked β†’ keep browser as WS relay β•‘
11
+ β•‘ β•‘
12
+ β•‘ Virtual Display: β•‘
13
+ β•‘ Set env var VR_DISPLAY=1 to auto-start Xvfb via β•‘
14
+ β•‘ pyvirtualdisplay (required on headless Linux / HF Spaces). β•‘
15
+ β•‘ β•‘
16
+ β•‘ NOTE: Headless Chrome is intentionally disabled β€” β•‘
17
+ β•‘ Cloudflare blocks headless user agents. β•‘
18
+ β•šβ•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•
19
+ """
20
+
21
+ import atexit
22
+ import json
23
+ import os
24
+ import sys
25
+ import time
26
+ import uuid
27
+ import random
28
+ import string
29
+ import threading
30
+ from pathlib import Path
31
+ from typing import Generator, Optional
32
+
33
+
34
+ # ═══════════════════════════════════════════════════════════
35
+ # Β§1 β€” AUTO INSTALL
36
+ # ═══════════════════════════════════════════════════════════
37
+ def _install(pkg, pip_name=None):
38
+ try:
39
+ __import__(pkg)
40
+ except ImportError:
41
+ import subprocess
42
+ subprocess.check_call(
43
+ [sys.executable, "-m", "pip", "install", "-q", pip_name or pkg]
44
+ )
45
+
46
+ _install("websocket", "websocket-client")
47
+ import websocket as _ws_mod
48
+
49
+ _HAS_BROWSER = False
50
+ try:
51
+ _install("DrissionPage")
52
+ from DrissionPage import ChromiumPage, ChromiumOptions
53
+ _HAS_BROWSER = True
54
+ except Exception:
55
+ pass
56
+
57
+
58
+ # ═══════════════════════════════════════════════════════════
59
+ # Β§1b β€” VIRTUAL DISPLAY (from OS env)
60
+ # ═══════════════════════════════════════════════════════════
61
+ def _parse_bool_env(key: str, default: bool = False) -> bool:
62
+ val = os.environ.get(key, "").strip().lower()
63
+ if not val:
64
+ return default
65
+ return val in ("1", "true", "yes", "on", "enable", "enabled")
66
+
67
+
68
+ VR_DISPLAY = _parse_bool_env("VR_DISPLAY", default=False)
69
+
70
+ _HAS_VIRTUAL_DISPLAY = False
71
+ _Display = None
72
+
73
+ if VR_DISPLAY:
74
+ try:
75
+ _install("pyvirtualdisplay", "PyVirtualDisplay")
76
+ from pyvirtualdisplay import Display as _Display
77
+ _HAS_VIRTUAL_DISPLAY = True
78
+ except Exception as _vd_err:
79
+ print(
80
+ f"[cloudflare] ⚠ VR_DISPLAY=1 but pyvirtualdisplay failed: {_vd_err}\n"
81
+ f"[cloudflare] Make sure Xvfb is installed: sudo apt install xvfb",
82
+ file=sys.stderr, flush=True,
83
+ )
84
+
85
+
86
+ # ═══════════════════════════════════════════════════════════
87
+ # Β§2 β€” CONSTANTS
88
+ # ═══════════════════════════════════════════════════════════
89
+ _SITE = "https://playground.ai.cloudflare.com"
90
+ _WS_BASE = "wss://playground.ai.cloudflare.com/agents/playground"
91
+ _CACHE = Path(__file__).resolve().parent / "cache"
92
+ _MFILE = _CACHE / "cloudflare_models.json"
93
+ _CHARS = string.ascii_letters + string.digits
94
+ _LOWER = string.ascii_lowercase + string.digits
95
+ _UA = (
96
+ "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
97
+ "AppleWebKit/537.36 (KHTML, like Gecko) "
98
+ "Chrome/146.0.0.0 Safari/537.36"
99
+ )
100
+
101
+ # Model cache TTL β€” 6 hours
102
+ _CACHE_TTL_SECONDS = 6 * 3600
103
+
104
+
105
+ # ═══════════════════════════════════════════════════════════
106
+ # Β§3 β€” MODEL TABLE (short alias β†’ full @cf/@hf ID)
107
+ # ══════════════════════════════════════════════════════════��
108
+ _SHORT_TO_FULL: dict[str, str] = {
109
+ "gpt-oss-120b": "@cf/openai/gpt-oss-120b",
110
+ "gpt-oss-20b": "@cf/openai/gpt-oss-20b",
111
+ "qwen1.5-0.5b-chat": "@cf/qwen/qwen1.5-0.5b-chat",
112
+ "qwen1.5-1.8b-chat": "@cf/qwen/qwen1.5-1.8b-chat",
113
+ "qwen1.5-7b-chat-awq": "@cf/qwen/qwen1.5-7b-chat-awq",
114
+ "qwen1.5-14b-chat-awq": "@cf/qwen/qwen1.5-14b-chat-awq",
115
+ "qwen2.5-coder-32b-instruct": "@cf/qwen/qwen2.5-coder-32b-instruct",
116
+ "qwq-32b": "@cf/qwen/qwq-32b",
117
+ "qwen3-30b-a3b-fp8": "@cf/qwen/qwen3-30b-a3b-fp8",
118
+ "gemma-2b-it-lora": "@cf/google/gemma-2b-it-lora",
119
+ "gemma-7b-it-lora": "@cf/google/gemma-7b-it-lora",
120
+ "gemma-3-12b-it": "@cf/google/gemma-3-12b-it",
121
+ "gemma-7b-it": "@hf/google/gemma-7b-it",
122
+ "starling-lm-7b-beta": "@hf/nexusflow/starling-lm-7b-beta",
123
+ "llama-3-8b-instruct": "@cf/meta/llama-3-8b-instruct",
124
+ "llama-3-8b-instruct-awq": "@cf/meta/llama-3-8b-instruct-awq",
125
+ "llama-3.2-3b-instruct": "@cf/meta/llama-3.2-3b-instruct",
126
+ "llama-3.2-1b-instruct": "@cf/meta/llama-3.2-1b-instruct",
127
+ "llama-3.2-11b-vision-instruct": "@cf/meta/llama-3.2-11b-vision-instruct",
128
+ "llama-3.3-70b-instruct-fp8-fast": "@cf/meta/llama-3.3-70b-instruct-fp8-fast",
129
+ "llama-3.1-8b-instruct-fp8": "@cf/meta/llama-3.1-8b-instruct-fp8",
130
+ "llama-3.1-8b-instruct-awq": "@cf/meta/llama-3.1-8b-instruct-awq",
131
+ "llama-3.1-70b-instruct": "@cf/meta/llama-3.1-70b-instruct",
132
+ "llama-4-scout-17b-16e-instruct": "@cf/meta/llama-4-scout-17b-16e-instruct",
133
+ "llama-2-7b-chat-fp16": "@cf/meta/llama-2-7b-chat-fp16",
134
+ "llama-2-7b-chat-int8": "@cf/meta/llama-2-7b-chat-int8",
135
+ "llama-2-7b-chat-hf-lora": "@cf/meta-llama/llama-2-7b-chat-hf-lora",
136
+ "llama-guard-3-8b": "@cf/meta/llama-guard-3-8b",
137
+ "mistral-7b-instruct-v0.1": "@cf/mistral/mistral-7b-instruct-v0.1",
138
+ "mistral-7b-instruct-v0.2-lora": "@cf/mistral/mistral-7b-instruct-v0.2-lora",
139
+ "mistral-7b-instruct-v0.2": "@hf/mistral/mistral-7b-instruct-v0.2",
140
+ "mistral-7b-instruct-v0.1-awq": "@hf/thebloke/mistral-7b-instruct-v0.1-awq",
141
+ "mistral-small-3.1-24b-instruct": "@cf/mistralai/mistral-small-3.1-24b-instruct",
142
+ "deepseek-r1-distill-qwen-32b": "@cf/deepseek-ai/deepseek-r1-distill-qwen-32b",
143
+ "deepseek-math-7b-instruct": "@cf/deepseek-ai/deepseek-math-7b-instruct",
144
+ "deepseek-coder-6.7b-base-awq": "@hf/thebloke/deepseek-coder-6.7b-base-awq",
145
+ "deepseek-coder-6.7b-instruct-awq":"@hf/thebloke/deepseek-coder-6.7b-instruct-awq",
146
+ "tinyllama-1.1b-chat-v1.0": "@cf/tinyllama/tinyllama-1.1b-chat-v1.0",
147
+ "falcon-7b-instruct": "@cf/tiiuae/falcon-7b-instruct",
148
+ "hermes-2-pro-mistral-7b": "@hf/nousresearch/hermes-2-pro-mistral-7b",
149
+ "neural-chat-7b-v3-1-awq": "@hf/thebloke/neural-chat-7b-v3-1-awq",
150
+ "openhermes-2.5-mistral-7b-awq": "@hf/thebloke/openhermes-2.5-mistral-7b-awq",
151
+ "openchat-3.5-0106": "@cf/openchat/openchat-3.5-0106",
152
+ "llama-2-13b-chat-awq": "@hf/thebloke/llama-2-13b-chat-awq",
153
+ "zephyr-7b-beta-awq": "@hf/thebloke/zephyr-7b-beta-awq",
154
+ "discolm-german-7b-v1-awq": "@cf/thebloke/discolm-german-7b-v1-awq",
155
+ "una-cybertron-7b-v2-bf16": "@cf/fblgit/una-cybertron-7b-v2-bf16",
156
+ "sqlcoder-7b-2": "@cf/defog/sqlcoder-7b-2",
157
+ "phi-2": "@cf/microsoft/phi-2",
158
+ "nemotron-3-120b-a12b": "@cf/nvidia/nemotron-3-120b-a12b",
159
+ "gemma-sea-lion-v4-27b-it": "@cf/aisingapore/gemma-sea-lion-v4-27b-it",
160
+ "glm-4.7-flash": "@cf/zai-org/glm-4.7-flash",
161
+ "granite-4.0-h-micro": "@cf/ibm-granite/granite-4.0-h-micro",
162
+ "kimi-k2.5": "@cf/moonshotai/kimi-k2.5",
163
+ }
164
+
165
+
166
+ # ═══════════════════════════════════════════════════════════
167
+ # Β§4 β€” HELPERS
168
+ # ═══════════════════════════════════════════════════════════
169
+ def _rid(n=8):
170
+ return "".join(random.choices(_CHARS, k=n))
171
+
172
+ def _rid_lower(n=9):
173
+ return "".join(random.choices(_LOWER, k=n))
174
+
175
+ def _make_sid():
176
+ return "Cloudflare-AI-Playground-" + _rid(21)
177
+
178
+ def _make_pk():
179
+ return str(uuid.uuid4())
180
+
181
+ def _make_ws_url(sid, pk):
182
+ return f"{_WS_BASE}/{sid}?_pk={pk}"
183
+
184
+ def _asst_id():
185
+ return f"assistant_{int(time.time()*1000)}_{_rid_lower(9)}"
186
+
187
+ def _resolve_model(name: str) -> str:
188
+ if not name:
189
+ return name
190
+ if name.startswith("@cf/") or name.startswith("@hf/"):
191
+ return name
192
+ return _SHORT_TO_FULL.get(name, name)
193
+
194
+
195
+ # ═══════════════════════════════════════════════════════════
196
+ # Β§5 β€” CONVERTER + BUILDER
197
+ # ═══════════════════════════════════════════════════════════
198
+ class _Conv:
199
+ @staticmethod
200
+ def to_cf(msgs):
201
+ sys_p, out = "", []
202
+ for m in msgs:
203
+ r, c = m.get("role", ""), m.get("content", "")
204
+ if r == "system":
205
+ sys_p = c
206
+ elif r == "user":
207
+ out.append({
208
+ "role": "user",
209
+ "parts": [{"type": "text", "text": c}],
210
+ "id": _rid(16),
211
+ })
212
+ elif r == "assistant":
213
+ out.append({
214
+ "id": _asst_id(),
215
+ "role": "assistant",
216
+ "parts": [
217
+ {"type": "step-start"},
218
+ {"type": "text", "text": c, "state": "done"},
219
+ ],
220
+ })
221
+ return sys_p, out
222
+
223
+ @staticmethod
224
+ def to_openai(cf_msgs, system=""):
225
+ out = []
226
+ if system:
227
+ out.append({"role": "system", "content": system})
228
+ for m in cf_msgs:
229
+ r = m.get("role", "")
230
+ t = next(
231
+ (p.get("text", "") for p in m.get("parts", [])
232
+ if p.get("type") == "text"),
233
+ "",
234
+ )
235
+ if r in ("user", "assistant") and t:
236
+ out.append({"role": r, "content": t})
237
+ return out
238
+
239
+
240
+ class _Build:
241
+ @staticmethod
242
+ def user(text):
243
+ return {
244
+ "role": "user",
245
+ "parts": [{"type": "text", "text": text}],
246
+ "id": _rid(16),
247
+ }
248
+
249
+ @staticmethod
250
+ def asst(text, reason=""):
251
+ p = [{"type": "step-start"}]
252
+ if reason:
253
+ p.append({"type": "reasoning", "text": reason, "state": "done"})
254
+ p.append({"type": "text", "text": text, "state": "done"})
255
+ return {"id": _asst_id(), "role": "assistant", "parts": p}
256
+
257
+ @staticmethod
258
+ def req(msgs):
259
+ return {
260
+ "id": _rid(8),
261
+ "init": {
262
+ "method": "POST",
263
+ "body": json.dumps({
264
+ "messages": msgs,
265
+ "trigger": "submit-message",
266
+ }, ensure_ascii=False),
267
+ },
268
+ "type": "cf_agent_use_chat_request",
269
+ }
270
+
271
+
272
+ # ═══════════════════════════════════════════════════════════
273
+ # Β§6 β€” MODEL CACHE (with TTL)
274
+ # ═══════════════════════════════════════════════════════════
275
+ class _Cache:
276
+ @staticmethod
277
+ def save(models):
278
+ _CACHE.mkdir(parents=True, exist_ok=True)
279
+ _MFILE.write_text(json.dumps({
280
+ "ts": time.time(), # epoch for TTL checks
281
+ "ts_human": time.strftime("%Y-%m-%d %H:%M:%S"),
282
+ "models": models,
283
+ }, indent=2, ensure_ascii=False))
284
+
285
+ @staticmethod
286
+ def load(ttl: int = _CACHE_TTL_SECONDS):
287
+ """Load cache only if it exists and is within TTL."""
288
+ if not _MFILE.exists():
289
+ return None
290
+ try:
291
+ data = json.loads(_MFILE.read_text())
292
+ age = time.time() - data.get("ts", 0)
293
+ if age > ttl:
294
+ return None # stale β€” force refresh
295
+ return data.get("models")
296
+ except Exception:
297
+ return None
298
+
299
+ @staticmethod
300
+ def clear():
301
+ if _MFILE.exists():
302
+ _MFILE.unlink()
303
+
304
+
305
+ # ═══════════════════════════════════════════════════════════
306
+ # Β§6b β€” VIRTUAL DISPLAY MANAGER
307
+ # ═══════════════════════════════════════════════════════════
308
+ class _VirtualDisplayManager:
309
+ """Thread-safe singleton that manages a single Xvfb display."""
310
+
311
+ _instance = None
312
+ _lock = threading.Lock()
313
+
314
+ def __init__(self):
315
+ self._display = None
316
+ self._running = False
317
+ self._enabled = VR_DISPLAY and _HAS_VIRTUAL_DISPLAY
318
+
319
+ @classmethod
320
+ def instance(cls) -> "_VirtualDisplayManager":
321
+ if cls._instance is None:
322
+ with cls._lock:
323
+ if cls._instance is None:
324
+ cls._instance = cls()
325
+ return cls._instance
326
+
327
+ @property
328
+ def enabled(self) -> bool:
329
+ return self._enabled
330
+
331
+ @property
332
+ def running(self) -> bool:
333
+ return self._running
334
+
335
+ def start(self, width: int = 1920, height: int = 1080, depth: int = 24):
336
+ if not self._enabled:
337
+ return
338
+ if self._running:
339
+ return
340
+
341
+ with self._lock:
342
+ if self._running:
343
+ return
344
+ try:
345
+ self._display = _Display(
346
+ visible=False,
347
+ size=(width, height),
348
+ color_depth=depth,
349
+ backend="xvfb",
350
+ )
351
+ self._display.start()
352
+ self._running = True
353
+ _log_vd(f"βœ“ Virtual display started "
354
+ f"({width}x{height}x{depth}) "
355
+ f"on :{self._display.display}")
356
+ except FileNotFoundError:
357
+ _log_vd(
358
+ "βœ— Xvfb binary not found! Install: sudo apt install xvfb"
359
+ )
360
+ self._enabled = False
361
+ except Exception as exc:
362
+ _log_vd(f"βœ— Failed to start virtual display: {exc}")
363
+ self._enabled = False
364
+
365
+ def stop(self):
366
+ if not self._running:
367
+ return
368
+ with self._lock:
369
+ if not self._running:
370
+ return
371
+ try:
372
+ if self._display:
373
+ self._display.stop()
374
+ _log_vd("βœ“ Virtual display stopped")
375
+ except Exception as exc:
376
+ _log_vd(f"⚠ Error stopping virtual display: {exc}")
377
+ finally:
378
+ self._display = None
379
+ self._running = False
380
+
381
+ def __repr__(self):
382
+ state = "running" if self._running else ("idle" if self._enabled else "disabled")
383
+ disp = f" :{self._display.display}" if self._running and self._display else ""
384
+ return f"VirtualDisplay({state}{disp})"
385
+
386
+
387
+ def _log_vd(msg: str):
388
+ print(f"[cloudflare:vdisplay] {msg}", file=sys.stderr, flush=True)
389
+
390
+
391
+ # ═══════════════════════════════════════════════════════════
392
+ # Β§7 β€” TRANSPORT LAYER
393
+ # ═══════════════════════════════════════════════════════════
394
+
395
+ # ── 7a: Direct Python WebSocket ──────────────────────────
396
+ class _DirectTransport:
397
+ """Pure Python WS via websocket-client with background recv thread."""
398
+
399
+ def __init__(self, debug=False):
400
+ self._ws = None
401
+ self._inbox = []
402
+ self._lock = threading.Lock()
403
+ self._running = False
404
+ self._thread = None
405
+ self._debug = debug
406
+
407
+ def connect(self, url: str, cookies: str = "") -> bool:
408
+ self._ws = _ws_mod.WebSocket()
409
+ headers = [f"User-Agent: {_UA}"]
410
+ if cookies:
411
+ headers.append(f"Cookie: {cookies}")
412
+
413
+ self._ws.connect(
414
+ url,
415
+ origin=_SITE,
416
+ header=headers,
417
+ timeout=15,
418
+ )
419
+
420
+ self._running = True
421
+ self._thread = threading.Thread(target=self._loop, daemon=True)
422
+ self._thread.start()
423
+ return True
424
+
425
+ def _loop(self):
426
+ self._ws.settimeout(0.05)
427
+ while self._running:
428
+ try:
429
+ data = self._ws.recv()
430
+ if data:
431
+ with self._lock:
432
+ self._inbox.append(data)
433
+ except _ws_mod.WebSocketTimeoutException:
434
+ continue
435
+ except _ws_mod.WebSocketConnectionClosedException:
436
+ self._running = False
437
+ break
438
+ except Exception:
439
+ if self._running:
440
+ self._running = False
441
+ break
442
+
443
+ def send(self, data: str) -> bool:
444
+ try:
445
+ if self._ws and self._ws.connected:
446
+ self._ws.send(data)
447
+ return True
448
+ except Exception:
449
+ pass
450
+ return False
451
+
452
+ def recv(self) -> list[str]:
453
+ with self._lock:
454
+ msgs = self._inbox[:]
455
+ self._inbox.clear()
456
+ return msgs
457
+
458
+ @property
459
+ def alive(self) -> bool:
460
+ return self._running and self._ws is not None and self._ws.connected
461
+
462
+ def close(self):
463
+ self._running = False
464
+ if self._ws:
465
+ try:
466
+ self._ws.close()
467
+ except Exception:
468
+ pass
469
+ self._ws = None
470
+ if self._thread and self._thread.is_alive():
471
+ self._thread.join(timeout=2)
472
+ self._thread = None
473
+
474
+
475
+ # ── 7b: Browser-based WebSocket (fallback) ───────────────
476
+ _BROWSER_JS = """
477
+ (function(){
478
+ if(window.__cfws) return 'exists';
479
+ window.__cfws = {
480
+ sock:null, alive:false, inbox:[], error:null,
481
+ connect:function(u){
482
+ var s=this; s.error=null; s.alive=false; s.inbox=[];
483
+ s.sock=new WebSocket(u);
484
+ s.sock.onopen=function(){s.alive=true;s.error=null};
485
+ s.sock.onmessage=function(e){s.inbox.push(e.data)};
486
+ s.sock.onerror=function(){s.error='ws_error'};
487
+ s.sock.onclose=function(e){
488
+ s.alive=false;
489
+ if(e.code!==1000&&e.code!==1005)s.error='closed_'+e.code
490
+ };
491
+ },
492
+ send:function(d){
493
+ if(this.sock&&this.sock.readyState===1){
494
+ this.sock.send(typeof d==='string'?d:JSON.stringify(d));
495
+ return true}return false},
496
+ drain:function(){
497
+ if(!this.inbox.length)return null;
498
+ var r=JSON.stringify(this.inbox);this.inbox=[];return r},
499
+ kill:function(){if(this.sock)this.sock.close();this.alive=false}
500
+ };
501
+ return 'ok';
502
+ })();
503
+ """
504
+
505
+
506
+ class _BrowserTransport:
507
+ """
508
+ Headless-FREE Chrome WebSocket relay via Xvfb virtual display.
509
+
510
+ NOTE: We intentionally do NOT use Chrome's --headless flag because
511
+ Cloudflare Playground detects and blocks headless user agents.
512
+ Instead we rely on pyvirtualdisplay / Xvfb to provide a real (but
513
+ invisible) X11 display on servers that have no physical monitor.
514
+
515
+ Set VR_DISPLAY=1 before importing to enable this behaviour.
516
+ """
517
+
518
+ def __init__(self, debug=False):
519
+ self._page = None
520
+ self._debug = debug
521
+ self._vd_mgr = _VirtualDisplayManager.instance()
522
+
523
+ def connect(self, url: str, **_) -> bool:
524
+ if not _HAS_BROWSER:
525
+ raise RuntimeError(
526
+ "DrissionPage not available β€” cannot use browser fallback"
527
+ )
528
+
529
+ # ── Start virtual display (Xvfb) if enabled ──────
530
+ if self._vd_mgr.enabled and not self._vd_mgr.running:
531
+ self._vd_mgr.start()
532
+ if not self._vd_mgr.running:
533
+ raise RuntimeError(
534
+ "Virtual display (Xvfb) failed to start. "
535
+ "Install xvfb: apt-get install -y xvfb"
536
+ )
537
+
538
+ if not self._vd_mgr.running:
539
+ raise RuntimeError(
540
+ "No display available and VR_DISPLAY is not set. "
541
+ "Set VR_DISPLAY=1 to use Xvfb virtual display, "
542
+ "or run on a machine with a real display. "
543
+ "Headless Chrome is intentionally disabled."
544
+ )
545
+
546
+ opts = ChromiumOptions()
547
+ opts.set_argument("--disable-blink-features=AutomationControlled")
548
+ opts.set_argument("--no-sandbox")
549
+ opts.set_argument("--disable-dev-shm-usage")
550
+ opts.set_argument("--disable-gpu")
551
+ opts.set_argument("--disable-extensions")
552
+ opts.set_argument("--disable-plugins")
553
+ opts.set_argument("--disable-infobars")
554
+ opts.set_argument("--window-size=1280,720")
555
+ # ── NO headless flag β€” Cloudflare blocks headless ──
556
+
557
+ self._page = ChromiumPage(addr_or_opts=opts)
558
+ self._page.get(_SITE)
559
+ time.sleep(4)
560
+
561
+ self._page.run_js(_BROWSER_JS)
562
+ self._page.run_js(f"window.__cfws.connect('{url}');")
563
+
564
+ deadline = time.time() + 15
565
+ while time.time() < deadline:
566
+ if self._page.run_js("return window.__cfws.alive;"):
567
+ return True
568
+ err = self._page.run_js("return window.__cfws.error;")
569
+ if err:
570
+ raise ConnectionError(f"Browser WS failed: {err}")
571
+ time.sleep(0.1)
572
+
573
+ raise ConnectionError("Browser WS timed out waiting for connection")
574
+
575
+ def send(self, data: str) -> bool:
576
+ try:
577
+ return bool(
578
+ self._page.run_js(
579
+ f"return window.__cfws.send({json.dumps(data)});"
580
+ )
581
+ )
582
+ except Exception:
583
+ return False
584
+
585
+ def recv(self) -> list[str]:
586
+ try:
587
+ raw = self._page.run_js("return window.__cfws.drain();")
588
+ except Exception:
589
+ return []
590
+ if not raw:
591
+ return []
592
+ try:
593
+ batch = json.loads(raw)
594
+ return batch if isinstance(batch, list) else []
595
+ except (json.JSONDecodeError, TypeError):
596
+ return []
597
+
598
+ @property
599
+ def alive(self) -> bool:
600
+ try:
601
+ return bool(self._page.run_js("return window.__cfws.alive;"))
602
+ except Exception:
603
+ return False
604
+
605
+ def close(self):
606
+ if self._page:
607
+ try:
608
+ self._page.run_js("if(window.__cfws) window.__cfws.kill();")
609
+ except Exception:
610
+ pass
611
+ try:
612
+ self._page.quit()
613
+ except Exception:
614
+ pass
615
+ self._page = None
616
+
617
+
618
+ # ═══════════════════════════════════════════════════════════
619
+ # Β§8 β€” PROVIDER
620
+ # ═════════════════���═════════════════════════════════════════
621
+ class CloudflareProvider:
622
+ """
623
+ ☁️ Cloudflare AI Playground β€” fully modular provider.
624
+
625
+ Virtual Display (required on headless servers):
626
+ Set VR_DISPLAY=1 before importing this module.
627
+ This starts Xvfb so Chrome has a real (invisible) display.
628
+ Headless Chrome is intentionally NOT used β€” Cloudflare blocks it.
629
+
630
+ $ export VR_DISPLAY=1
631
+ $ python server.py
632
+
633
+ Usage:
634
+ provider = CloudflareProvider()
635
+ for chunk in provider.chat(data="Hello!"):
636
+ print(chunk, end="")
637
+
638
+ # non-streaming:
639
+ response = provider.ask("What is 2+2?")
640
+ """
641
+
642
+ def __init__(
643
+ self,
644
+ model: str = "@cf/moonshotai/kimi-k2.5",
645
+ system: str = "You are a helpful assistant.",
646
+ temperature: float = 1.0,
647
+ max_tokens: int = None,
648
+ timeout_init: int = 120,
649
+ timeout_idle: int = 30,
650
+ use_cache: bool = True,
651
+ debug: bool = False,
652
+ ):
653
+ self.model = _resolve_model(model)
654
+ self.system = system
655
+ self.temperature = temperature
656
+ self.max_tokens = max_tokens
657
+ self.timeout_init = timeout_init
658
+ self.timeout_idle = timeout_idle
659
+ self.use_cache = use_cache
660
+ self.debug = debug
661
+
662
+ self.history: list[dict] = []
663
+ self.models: list[dict] = []
664
+ self._chat_models: list[dict] = []
665
+ self.last_response: str = ""
666
+ self.last_reasoning: str = ""
667
+
668
+ self._sid: str = ""
669
+ self._pk: str = ""
670
+ self._transport = None
671
+ self._mode: str = ""
672
+ self._on: bool = False
673
+
674
+ self._boot()
675
+ atexit.register(self.close)
676
+
677
+ # ─────────────────────────────────────────────────
678
+ # Logging
679
+ # ─────────────────────────────────────────────────
680
+ def _d(self, *a):
681
+ if self.debug:
682
+ print("[cloudflare]", *a, file=sys.stderr, flush=True)
683
+
684
+ # ─────────────────────────────────────────────────
685
+ # Low-level WS
686
+ # ─────────────────────────────────────────────────
687
+ def _pull(self) -> list[str]:
688
+ msgs = self._transport.recv()
689
+ if self.debug:
690
+ for m in msgs:
691
+ self._d("←", str(m)[:160])
692
+ return msgs
693
+
694
+ def _push(self, obj):
695
+ raw = json.dumps(obj, ensure_ascii=False)
696
+ self._d("β†’", raw[:300])
697
+ if not self._transport.send(raw):
698
+ raise RuntimeError("WebSocket send failed")
699
+
700
+ # ─────────────────────────────────────────────────
701
+ # Boot β€” tries direct WS first, then Xvfb browser
702
+ # ─────────────────────────────────────────────────
703
+ def _boot(self):
704
+ self._sid = _make_sid()
705
+ self._pk = _make_pk()
706
+ url = _make_ws_url(self._sid, self._pk)
707
+
708
+ # ── Attempt 1: direct Python WebSocket ──────
709
+ try:
710
+ self._d("Trying direct Python WebSocket...")
711
+ t = _DirectTransport(debug=self.debug)
712
+ t.connect(url)
713
+
714
+ time.sleep(0.3)
715
+ if t.alive:
716
+ self._transport = t
717
+ self._mode = "direct"
718
+ self._d("βœ“ Direct connection β€” no browser needed!")
719
+ else:
720
+ t.close()
721
+ raise ConnectionError("Direct WS not alive after connect")
722
+
723
+ except Exception as e:
724
+ self._d(f"Direct failed: {e}")
725
+ self._d("Falling back to browser transport (Xvfb)...")
726
+
727
+ # ── Attempt 2: Xvfb + Chrome relay ──────
728
+ try:
729
+ t = _BrowserTransport(debug=self.debug)
730
+ t.connect(url)
731
+ self._transport = t
732
+ self._mode = "browser"
733
+ self._d("βœ“ Browser transport connected")
734
+ vd = _VirtualDisplayManager.instance()
735
+ if vd.running:
736
+ self._d(f" └─ {vd}")
737
+ except Exception as e2:
738
+ raise ConnectionError(
739
+ f"All connection methods failed.\n"
740
+ f" Direct: {e}\n"
741
+ f" Browser: {e2}\n"
742
+ f" Tip: ensure VR_DISPLAY=1 and xvfb is installed."
743
+ ) from e2
744
+
745
+ self._on = True
746
+
747
+ # ── Handshake ──────────────────────────────
748
+ want = {"cf_agent_identity", "cf_agent_state", "cf_agent_mcp_servers"}
749
+ seen = set()
750
+ deadline = time.time() + 10
751
+ while time.time() < deadline and seen < want:
752
+ for raw in self._pull():
753
+ try:
754
+ seen.add(json.loads(raw).get("type", ""))
755
+ except Exception:
756
+ pass
757
+ time.sleep(0.05)
758
+
759
+ self._d(f"Handshake received: {seen}")
760
+
761
+ self._push({"type": "cf_agent_stream_resume_request"})
762
+ time.sleep(0.3)
763
+ self._pull()
764
+
765
+ # ── Models + state ─────────────────────────
766
+ self._load_models()
767
+ if self.max_tokens is None:
768
+ self.max_tokens = self._ctx_window(self.model)
769
+ self._sync()
770
+
771
+ # ─────────────────────────────────────────────────
772
+ # Models
773
+ # ─────────────────────────────────────────────────
774
+ def _load_models(self):
775
+ if self.use_cache:
776
+ cached = _Cache.load()
777
+ if cached:
778
+ self.models = cached
779
+ self._chat_models = [
780
+ m for m in self.models
781
+ if m.get("task", {}).get("name") == "Text Generation"
782
+ ]
783
+ self._d(f"Loaded {len(self._chat_models)} chat models from cache")
784
+ return
785
+ self._fetch_models()
786
+ if self.models and self.use_cache:
787
+ _Cache.save(self.models)
788
+
789
+ def _fetch_models(self):
790
+ rid = str(uuid.uuid4())
791
+ self._push({"args": [], "id": rid, "method": "getModels", "type": "rpc"})
792
+
793
+ deadline = time.time() + 15
794
+ while time.time() < deadline:
795
+ for raw in self._pull():
796
+ try:
797
+ d = json.loads(raw)
798
+ except Exception:
799
+ continue
800
+ if (d.get("type") == "rpc" and d.get("id") == rid
801
+ and d.get("done") and d.get("success")):
802
+ self.models = d.get("result", [])
803
+ self._chat_models = [
804
+ m for m in self.models
805
+ if m.get("task", {}).get("name") == "Text Generation"
806
+ ]
807
+ self._d(f"Fetched {len(self._chat_models)} chat models")
808
+ return
809
+ time.sleep(0.05)
810
+ self._d("Warning: model fetch timed out")
811
+
812
+ def _ctx_window(self, model_name: str) -> int:
813
+ for m in self._chat_models:
814
+ if m.get("name") == model_name:
815
+ for p in m.get("properties", []):
816
+ if p.get("property_id") == "context_window":
817
+ try:
818
+ return int(p["value"])
819
+ except (ValueError, KeyError):
820
+ pass
821
+ return 4096
822
+
823
+ def _resolve(self, name: str) -> str:
824
+ if not name:
825
+ return name
826
+ if name.startswith("@cf/") or name.startswith("@hf/"):
827
+ return name
828
+ for m in self._chat_models:
829
+ full = m.get("name", "")
830
+ short = full.rsplit("/", 1)[-1]
831
+ if short == name or full == name:
832
+ return full
833
+ return _SHORT_TO_FULL.get(name, name)
834
+
835
+ # ─────────────────────────────────────────────────
836
+ # State Sync
837
+ # ─────────────────────────────────────────────────
838
+ def _sync(self):
839
+ self._push({
840
+ "type": "cf_agent_state",
841
+ "state": {
842
+ "model": self.model,
843
+ "temperature": self.temperature,
844
+ "stream": True,
845
+ "system": self.system,
846
+ "useExternalProvider": False,
847
+ "externalProvider": "openai",
848
+ "authMethod": "provider-key",
849
+ },
850
+ })
851
+ time.sleep(0.15)
852
+ self._pull()
853
+
854
+ # ─────────────────────────────────────────────────
855
+ # Setters
856
+ # ─────────────────────────────────────────────────
857
+ def set_model(self, name: str) -> "CloudflareProvider":
858
+ full = self._resolve(name)
859
+ self.model = full
860
+ self.max_tokens = self._ctx_window(full)
861
+ if self._on:
862
+ self._sync()
863
+ return self
864
+
865
+ def set_system(self, prompt: str) -> "CloudflareProvider":
866
+ self.system = prompt
867
+ if self._on:
868
+ self._sync()
869
+ return self
870
+
871
+ def set_temperature(self, t: float) -> "CloudflareProvider":
872
+ self.temperature = max(0.0, min(2.0, t))
873
+ if self._on:
874
+ self._sync()
875
+ return self
876
+
877
+ def set_max_tokens(self, n: int) -> "CloudflareProvider":
878
+ self.max_tokens = n
879
+ return self
880
+
881
+ def clear_history(self):
882
+ self.history.clear()
883
+
884
+ def get_history(self) -> list[dict]:
885
+ return _Conv.to_openai(self.history, self.system)
886
+
887
+ # ─────────────────────────────────────────────────
888
+ # Model listing
889
+ # ─────────────────────────────────────────────────
890
+ def list_models(self) -> list[dict]:
891
+ return [{
892
+ "name": m.get("name", ""),
893
+ "short": m.get("name", "").rsplit("/", 1)[-1],
894
+ "context": self._ctx_window(m.get("name", "")),
895
+ "active": m.get("name") == self.model,
896
+ } for m in self._chat_models]
897
+
898
+ def refresh_models(self):
899
+ _Cache.clear()
900
+ self._fetch_models()
901
+ if self.models and self.use_cache:
902
+ _Cache.save(self.models)
903
+
904
+ # ═══════════════════════════════════════════════════
905
+ # β˜… CHAT (streaming generator)
906
+ # ═══════════════════════════════════════════════════
907
+ def chat(
908
+ self,
909
+ data: str = None,
910
+ messages: list[dict] = None,
911
+ model: str = None,
912
+ temperature: float = None,
913
+ system: str = None,
914
+ max_tokens: int = None,
915
+ ) -> Generator[str, None, None]:
916
+ if not self._on:
917
+ raise RuntimeError("Not connected β€” call new_session()")
918
+ if not messages and not data:
919
+ raise ValueError("Provide 'messages' or 'data'")
920
+
921
+ changed = False
922
+
923
+ if model:
924
+ full = self._resolve(model)
925
+ if full != self.model:
926
+ self.model = full
927
+ self.max_tokens = self._ctx_window(full)
928
+ changed = True
929
+
930
+ if temperature is not None and temperature != self.temperature:
931
+ self.temperature = max(0.0, min(2.0, temperature))
932
+ changed = True
933
+
934
+ if system and system != self.system:
935
+ self.system = system
936
+ changed = True
937
+
938
+ if max_tokens is not None:
939
+ self.max_tokens = max_tokens
940
+
941
+ if messages:
942
+ sys_p, cf_msgs = _Conv.to_cf(messages)
943
+ if sys_p and sys_p != self.system:
944
+ self.system = sys_p
945
+ changed = True
946
+ self.history = cf_msgs
947
+ else:
948
+ self.history.append(_Build.user(data))
949
+
950
+ if changed:
951
+ self._sync()
952
+
953
+ self._push(_Build.req(self.history))
954
+
955
+ full_text = ""
956
+ reasoning = ""
957
+ error = None
958
+ got_first = False
959
+ done = False
960
+ last_data = time.time()
961
+ reasoning_open = False
962
+
963
+ while not done:
964
+ if not self._transport.alive:
965
+ self._d("Transport died mid-stream")
966
+ if not full_text:
967
+ yield "[Connection lost]\n"
968
+ break
969
+
970
+ msgs = self._pull()
971
+
972
+ if not msgs:
973
+ elapsed = time.time() - last_data
974
+ limit = self.timeout_idle if got_first else self.timeout_init
975
+ if elapsed > limit:
976
+ self._d(f"Timeout after {elapsed:.1f}s")
977
+ if not full_text:
978
+ yield "[Timeout β€” no response received]\n"
979
+ break
980
+ time.sleep(0.015 if got_first else 0.04)
981
+ continue
982
+
983
+ last_data = time.time()
984
+
985
+ for raw in msgs:
986
+ try:
987
+ f = json.loads(raw)
988
+ except Exception:
989
+ continue
990
+
991
+ ftype = f.get("type", "")
992
+ if ftype != "cf_agent_use_chat_response":
993
+ continue
994
+
995
+ body_str = f.get("body", "")
996
+ if body_str:
997
+ try:
998
+ b = json.loads(body_str)
999
+ except Exception:
1000
+ continue
1001
+
1002
+ bt = b.get("type", "")
1003
+
1004
+ if bt == "reasoning-start":
1005
+ reasoning_open = True
1006
+ got_first = True
1007
+ yield "<think>\n"
1008
+
1009
+ elif bt == "reasoning-delta":
1010
+ delta = b.get("delta", "")
1011
+ if delta:
1012
+ reasoning += delta
1013
+ got_first = True
1014
+ yield delta
1015
+
1016
+ elif bt == "reasoning-end":
1017
+ if reasoning_open:
1018
+ reasoning_open = False
1019
+ yield "\n</think>\n\n"
1020
+
1021
+ elif bt == "text-delta":
1022
+ delta = b.get("delta", "")
1023
+ if reasoning_open:
1024
+ reasoning_open = False
1025
+ yield "\n</think>\n\n"
1026
+ if delta:
1027
+ full_text += delta
1028
+ got_first = True
1029
+ yield delta
1030
+
1031
+ elif bt == "error":
1032
+ error = b.get("message", str(b))
1033
+
1034
+ if f.get("done", False):
1035
+ done = True
1036
+ break
1037
+
1038
+ if reasoning_open:
1039
+ yield "\n</think>\n\n"
1040
+
1041
+ if error:
1042
+ self._d("Server error:", error)
1043
+ if not full_text:
1044
+ yield f"\n[Error: {error}]\n"
1045
+
1046
+ if full_text:
1047
+ self.history.append(_Build.asst(full_text, reasoning))
1048
+
1049
+ self.last_response = full_text
1050
+ self.last_reasoning = reasoning
1051
+
1052
+ # ─────────────────────────────────────────────────
1053
+ # ask() β€” non-streaming convenience
1054
+ # ─────────────────────────────────────────────────
1055
+ def ask(self, prompt: str, **kwargs) -> str:
1056
+ return "".join(self.chat(data=prompt, **kwargs))
1057
+
1058
+ # ─────────────────────────────────────────────────
1059
+ # Session management
1060
+ # ─────────────────────────────────────────────────
1061
+ def new_session(self):
1062
+ self._close_transport()
1063
+ self.history.clear()
1064
+ self._boot()
1065
+
1066
+ def _close_transport(self):
1067
+ if self._transport:
1068
+ try:
1069
+ self._transport.close()
1070
+ except Exception:
1071
+ pass
1072
+ self._transport = None
1073
+ self._on = False
1074
+
1075
+ def close(self):
1076
+ self._close_transport()
1077
+ if self._mode == "browser":
1078
+ vd = _VirtualDisplayManager.instance()
1079
+ vd.stop()
1080
+ self._d("Closed.")
1081
+
1082
+ def __enter__(self):
1083
+ return self
1084
+
1085
+ def __exit__(self, *_):
1086
+ self.close()
1087
+
1088
+ def __del__(self):
1089
+ try:
1090
+ self.close()
1091
+ except Exception:
1092
+ pass
1093
+
1094
+ def __repr__(self):
1095
+ s = "βœ…" if self._on else "❌"
1096
+ vd = _VirtualDisplayManager.instance()
1097
+ vd_info = f" vdisplay={vd}" if vd.enabled else ""
1098
+ return (
1099
+ f"CloudflareProvider({s} mode={self._mode!r} "
1100
+ f"model={self.model!r} max_tokens={self.max_tokens}{vd_info})"
1101
+ )
1102
+
1103
+
1104
+ # ═══════════════════════════════════════════════════════════
1105
+ # Β§9 β€” PROCESS-EXIT CLEANUP
1106
+ # ═══════════════════════════════════════════════════════════
1107
+ def _cleanup_virtual_display():
1108
+ try:
1109
+ vd = _VirtualDisplayManager.instance()
1110
+ vd.stop()
1111
+ except Exception:
1112
+ pass
1113
+
1114
+ atexit.register(_cleanup_virtual_display)
requirements.txt ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # ─── Web framework ──────────────────────────────
2
+ fastapi>=0.111.0
3
+ uvicorn[standard]>=0.29.0
4
+ pydantic>=2.0.0
5
+
6
+ # ─── Cloudflare provider deps ───────────────────
7
+ websocket-client>=1.8.0
8
+ DrissionPage>=4.1.0
9
+
10
+ # ─── Virtual display (Xvfb wrapper) ─────────────
11
+ PyVirtualDisplay>=3.0
server.py ADDED
@@ -0,0 +1,634 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ ╔═══════════════════════════════════════════════════════════════╗
3
+ β•‘ server.py β€” Cloudflare AI REST API β•‘
4
+ β•‘ β•‘
5
+ β•‘ OpenAI-compatible endpoints: β•‘
6
+ β•‘ POST /v1/chat/completions (streaming + non-streaming) β•‘
7
+ β•‘ GET /v1/models β•‘
8
+ β•‘ GET /health β•‘
9
+ β•‘ GET / β•‘
10
+ β•‘ β•‘
11
+ β•‘ Architecture: β•‘
12
+ β•‘ β€’ ProviderPool β€” N pre-warmed WS connections β•‘
13
+ β•‘ β€’ acquire() β€” queue-based fair checkout, auto-heal β•‘
14
+ β•‘ β€’ HealthMonitor β€” periodic background probe + heal β•‘
15
+ ║ ‒ SSE streaming — thread→asyncio bridge via Queue ║
16
+ β•šβ•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•
17
+ """
18
+
19
+ import asyncio
20
+ import json
21
+ import logging
22
+ import os
23
+ import sys
24
+ import threading
25
+ import time
26
+ import uuid
27
+ from contextlib import asynccontextmanager
28
+ from typing import AsyncGenerator, List, Optional
29
+
30
+ import uvicorn
31
+ from fastapi import FastAPI, HTTPException, Request
32
+ from fastapi.middleware.cors import CORSMiddleware
33
+ from fastapi.responses import JSONResponse, StreamingResponse
34
+ from pydantic import BaseModel, Field
35
+
36
+ # ─── Import provider ────────────────────────────────────────
37
+ sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
38
+ from cloudflare_provider import CloudflareProvider
39
+
40
+ # ═══════════════════════════════════════════════════════════
41
+ # LOGGING
42
+ # ═══════════════════════════════════════════════════════════
43
+ logging.basicConfig(
44
+ level=logging.INFO,
45
+ format="%(asctime)s %(levelname)-8s %(message)s",
46
+ stream=sys.stdout,
47
+ datefmt="%H:%M:%S",
48
+ )
49
+ log = logging.getLogger("cf-api")
50
+
51
+ # ═══════════════════════════════════════════════════════════
52
+ # CONFIG (all tunable via environment variables)
53
+ # ═══════════════════════════════════════════════════════════
54
+ POOL_SIZE = int(os.getenv("POOL_SIZE", "2"))
55
+ PORT = int(os.getenv("PORT", "7860"))
56
+ HOST = os.getenv("HOST", "0.0.0.0")
57
+ HEALTH_INTERVAL = int(os.getenv("HEALTH_INTERVAL", "60")) # seconds
58
+ ACQUIRE_TIMEOUT = int(os.getenv("ACQUIRE_TIMEOUT", "60")) # wait for free slot
59
+ STREAM_TIMEOUT = int(os.getenv("STREAM_TIMEOUT", "120")) # total stream timeout
60
+ DEFAULT_MODEL = os.getenv("DEFAULT_MODEL", "@cf/moonshotai/kimi-k2.5")
61
+ DEFAULT_SYSTEM = os.getenv("DEFAULT_SYSTEM", "You are a helpful assistant.")
62
+
63
+
64
+ # ═══════════════════════════════════════════════════════════
65
+ # PYDANTIC SCHEMAS
66
+ # ═══════════════════════════════════════════════════════════
67
+ class Message(BaseModel):
68
+ role: str
69
+ content: str
70
+
71
+ class ChatRequest(BaseModel):
72
+ model: str = DEFAULT_MODEL
73
+ messages: List[Message]
74
+ temperature: float = Field(default=1.0, ge=0.0, le=2.0)
75
+ max_tokens: Optional[int] = None
76
+ stream: bool = True
77
+ system: Optional[str] = None # extra system-prompt override
78
+
79
+ class CompletionChoice(BaseModel):
80
+ index: int
81
+ message: dict
82
+ finish_reason: str
83
+
84
+ class CompletionResponse(BaseModel):
85
+ id: str
86
+ object: str
87
+ created: int
88
+ model: str
89
+ choices: List[CompletionChoice]
90
+ usage: dict
91
+
92
+
93
+ # ═══════════════════════════════════════════════════════════
94
+ # MANAGED PROVIDER (pool slot)
95
+ # ═══════════════════════════════════════════════════════════
96
+ class ManagedProvider:
97
+ """A single pool slot wrapping one CloudflareProvider instance."""
98
+
99
+ def __init__(self, slot_id: int):
100
+ self.slot_id = slot_id
101
+ self.provider: Optional[CloudflareProvider] = None
102
+ self.busy = False
103
+ self.born_at = 0.0
104
+ self.error_count = 0
105
+ self.request_count = 0
106
+
107
+ # ── Health ──────────────────────────────────────
108
+ def is_healthy(self) -> bool:
109
+ if self.provider is None:
110
+ return False
111
+ try:
112
+ return (
113
+ self.provider._on
114
+ and self.provider._transport is not None
115
+ and self.provider._transport.alive
116
+ )
117
+ except Exception:
118
+ return False
119
+
120
+ # ── Teardown ────────────────────────────────────
121
+ def close(self):
122
+ p = self.provider
123
+ self.provider = None
124
+ if p:
125
+ try:
126
+ p.close()
127
+ except Exception:
128
+ pass
129
+
130
+ def __repr__(self):
131
+ state = "busy" if self.busy else ("ok" if self.is_healthy() else "dead")
132
+ mode = self.provider._mode if self.provider else "none"
133
+ return (
134
+ f"<Slot#{self.slot_id} {state} mode={mode!r} "
135
+ f"reqs={self.request_count} errs={self.error_count}>"
136
+ )
137
+
138
+
139
+ # ═══════════════════════════════════════════════════════════
140
+ # PROVIDER POOL
141
+ # ═══════════════════════════════════════════════════════════
142
+ class ProviderPool:
143
+ """
144
+ Pre-warmed pool of CloudflareProvider connections.
145
+
146
+ β€’ initialize() β€” create all slots at startup
147
+ β€’ acquire() β€” async context manager; blocks until a free slot
148
+ β€’ health_monitor β€” background coroutine; heals broken idle slots
149
+ β€’ shutdown() β€” clean teardown
150
+ """
151
+
152
+ def __init__(self, size: int = 2):
153
+ self.size = size
154
+ self._slots: List[ManagedProvider] = []
155
+ self._queue: asyncio.Queue = None # set in initialize()
156
+ self._loop: asyncio.AbstractEventLoop = None
157
+ self._lock = asyncio.Lock()
158
+
159
+ # ─── Startup ──────────────────────────────────
160
+ async def initialize(self):
161
+ self._loop = asyncio.get_event_loop()
162
+ self._queue = asyncio.Queue(maxsize=self.size)
163
+
164
+ log.info(f"πŸš€ Initializing provider pool (slots={self.size})")
165
+
166
+ results = await asyncio.gather(
167
+ *[self._spawn_slot(i) for i in range(self.size)],
168
+ return_exceptions=True,
169
+ )
170
+
171
+ ok = sum(1 for r in results if not isinstance(r, Exception))
172
+ log.info(f" Pool ready β€” {ok}/{self.size} slots healthy")
173
+
174
+ if ok == 0:
175
+ raise RuntimeError(
176
+ "No provider slots could connect. Check network / Xvfb setup."
177
+ )
178
+
179
+ async def _spawn_slot(self, slot_id: int) -> ManagedProvider:
180
+ managed = ManagedProvider(slot_id)
181
+
182
+ def _create() -> CloudflareProvider:
183
+ log.info(f" [S{slot_id}] Connecting...")
184
+ return CloudflareProvider(
185
+ model = DEFAULT_MODEL,
186
+ system = DEFAULT_SYSTEM,
187
+ debug = False,
188
+ use_cache = True,
189
+ )
190
+
191
+ managed.provider = await asyncio.wait_for(
192
+ self._loop.run_in_executor(None, _create),
193
+ timeout=180,
194
+ )
195
+ managed.born_at = time.time()
196
+
197
+ self._slots.append(managed)
198
+ await self._queue.put(managed)
199
+
200
+ mode = managed.provider._mode
201
+ log.info(f" [S{slot_id}] βœ“ Ready mode={mode!r}")
202
+ return managed
203
+
204
+ # ─── Acquire ──────────────────────────────────
205
+ @asynccontextmanager
206
+ async def acquire(self):
207
+ """Checkout a provider, yield it, return on exit (healing if needed)."""
208
+ managed: ManagedProvider = await asyncio.wait_for(
209
+ self._queue.get(),
210
+ timeout=ACQUIRE_TIMEOUT,
211
+ )
212
+ managed.busy = True
213
+ ok = True
214
+
215
+ try:
216
+ # Heal before handing out
217
+ if not managed.is_healthy():
218
+ log.warning(f"[S{managed.slot_id}] Unhealthy β€” healing before use")
219
+ await self._heal(managed)
220
+
221
+ managed.request_count += 1
222
+ yield managed.provider
223
+
224
+ except Exception:
225
+ managed.error_count += 1
226
+ ok = False
227
+ raise
228
+
229
+ finally:
230
+ managed.busy = False
231
+ # After use: return if healthy, else heal in background
232
+ if managed.is_healthy():
233
+ await self._queue.put(managed)
234
+ else:
235
+ log.warning(f"[S{managed.slot_id}] Unhealthy after use β€” background heal")
236
+ asyncio.create_task(self._heal_then_return(managed))
237
+
238
+ # ─── Healing ──────────────────────────────────
239
+ async def _heal(self, managed: ManagedProvider):
240
+ sid = managed.slot_id
241
+
242
+ def _recreate() -> CloudflareProvider:
243
+ managed.close()
244
+ return CloudflareProvider(
245
+ model = DEFAULT_MODEL,
246
+ system = DEFAULT_SYSTEM,
247
+ debug = False,
248
+ use_cache = True,
249
+ )
250
+
251
+ managed.provider = await asyncio.wait_for(
252
+ self._loop.run_in_executor(None, _recreate),
253
+ timeout=180,
254
+ )
255
+ managed.born_at = time.time()
256
+ managed.error_count = 0
257
+ log.info(f"[S{sid}] βœ“ Healed mode={managed.provider._mode!r}")
258
+
259
+ async def _heal_then_return(self, managed: ManagedProvider):
260
+ try:
261
+ await self._heal(managed)
262
+ except Exception as e:
263
+ log.error(f"[S{managed.slot_id}] Heal failed: {e}")
264
+ # Try a brand-new slot as last resort
265
+ try:
266
+ managed.close()
267
+ managed.provider = await asyncio.wait_for(
268
+ self._loop.run_in_executor(
269
+ None,
270
+ lambda: CloudflareProvider(
271
+ model=DEFAULT_MODEL, system=DEFAULT_SYSTEM,
272
+ debug=False, use_cache=True,
273
+ ),
274
+ ),
275
+ timeout=180,
276
+ )
277
+ managed.born_at = time.time()
278
+ managed.error_count = 0
279
+ log.info(f"[S{managed.slot_id}] βœ“ Cold-boot recovery succeeded")
280
+ except Exception as e2:
281
+ log.error(f"[S{managed.slot_id}] Cold-boot also failed: {e2}")
282
+
283
+ await self._queue.put(managed)
284
+
285
+ # ─── Health monitor ───────────────────────────
286
+ async def health_monitor(self):
287
+ """Periodic background coroutine β€” checks and heals idle slots."""
288
+ while True:
289
+ await asyncio.sleep(HEALTH_INTERVAL)
290
+ healthy = sum(1 for m in self._slots if m.is_healthy())
291
+ busy = sum(1 for m in self._slots if m.busy)
292
+ log.info(
293
+ f"β™₯ Health check β€” {healthy}/{self.size} healthy, "
294
+ f"{busy} busy, queue={self._queue.qsize()}"
295
+ )
296
+
297
+ for managed in list(self._slots):
298
+ if not managed.busy and not managed.is_healthy():
299
+ log.warning(f"[S{managed.slot_id}] Idle but unhealthy β€” healing")
300
+ # Pull from queue if it's still there, otherwise skip
301
+ asyncio.create_task(self._heal_then_return(managed))
302
+
303
+ # ─── Status ───────────────────────────────────
304
+ @property
305
+ def status(self) -> dict:
306
+ return {
307
+ "pool_size": self.size,
308
+ "queue_free": self._queue.qsize() if self._queue else 0,
309
+ "slots": [
310
+ {
311
+ "id": m.slot_id,
312
+ "healthy": m.is_healthy(),
313
+ "busy": m.busy,
314
+ "mode": m.provider._mode if m.provider else "none",
315
+ "errors": m.error_count,
316
+ "requests": m.request_count,
317
+ "age_s": round(time.time() - m.born_at, 1) if m.born_at else 0,
318
+ }
319
+ for m in self._slots
320
+ ],
321
+ }
322
+
323
+ # ─── Shutdown ─────────────────────────────────
324
+ async def shutdown(self):
325
+ log.info("Shutting down provider pool...")
326
+ for m in self._slots:
327
+ m.close()
328
+ log.info("Pool shut down.")
329
+
330
+
331
+ # ═══════════════════════════════════════════════════════════
332
+ # GLOBAL POOL REFERENCE
333
+ # ═══════════════════════════════════════════════════════════
334
+ pool: ProviderPool = None
335
+
336
+
337
+ # ═══════════════════════════════════════════════════════════
338
+ # LIFESPAN (startup / shutdown)
339
+ # ═══════════════════════════════════════════════════════════
340
+ @asynccontextmanager
341
+ async def lifespan(app: FastAPI):
342
+ global pool
343
+ pool = ProviderPool(size=POOL_SIZE)
344
+ await pool.initialize()
345
+
346
+ monitor = asyncio.create_task(pool.health_monitor())
347
+ log.info(f"βœ… Server ready on {HOST}:{PORT}")
348
+
349
+ yield
350
+
351
+ monitor.cancel()
352
+ try:
353
+ await monitor
354
+ except asyncio.CancelledError:
355
+ pass
356
+ await pool.shutdown()
357
+
358
+
359
+ # ═══════════════════════════════════════════════════════════
360
+ # FASTAPI APP
361
+ # ═══════════════════════════════════════════════════════════
362
+ app = FastAPI(
363
+ title = "Cloudflare AI API",
364
+ description = "OpenAI-compatible streaming API via Cloudflare AI Playground",
365
+ version = "1.0.0",
366
+ lifespan = lifespan,
367
+ docs_url = "/docs",
368
+ redoc_url = "/redoc",
369
+ )
370
+
371
+ app.add_middleware(
372
+ CORSMiddleware,
373
+ allow_origins = ["*"],
374
+ allow_methods = ["*"],
375
+ allow_headers = ["*"],
376
+ )
377
+
378
+
379
+ # ═══════════════════════════════════════════════════════════
380
+ # SSE STREAMING HELPERS
381
+ # ═══════════════════════════════════════════════════════════
382
+ def _sse_chunk(content: str, model: str, chunk_id: str) -> str:
383
+ """Format one SSE data line in OpenAI chunk format."""
384
+ payload = {
385
+ "id": chunk_id,
386
+ "object": "chat.completion.chunk",
387
+ "created": int(time.time()),
388
+ "model": model,
389
+ "choices": [{
390
+ "index": 0,
391
+ "delta": {"content": content},
392
+ "finish_reason": None,
393
+ }],
394
+ }
395
+ return f"data: {json.dumps(payload, ensure_ascii=False)}\n\n"
396
+
397
+
398
+ def _sse_done(model: str, chunk_id: str) -> str:
399
+ """Final SSE chunk with finish_reason=stop."""
400
+ payload = {
401
+ "id": chunk_id,
402
+ "object": "chat.completion.chunk",
403
+ "created": int(time.time()),
404
+ "model": model,
405
+ "choices": [{
406
+ "index": 0,
407
+ "delta": {},
408
+ "finish_reason": "stop",
409
+ }],
410
+ }
411
+ return f"data: {json.dumps(payload)}\n\ndata: [DONE]\n\n"
412
+
413
+
414
+ def _sse_error(msg: str) -> str:
415
+ return f"data: {{\"error\": {json.dumps(msg)}}}\n\ndata: [DONE]\n\n"
416
+
417
+
418
+ async def _stream_generator(
419
+ provider: CloudflareProvider,
420
+ req: ChatRequest,
421
+ ) -> AsyncGenerator[str, None]:
422
+ """
423
+ Bridge between the synchronous provider.chat() generator and
424
+ FastAPI's async StreamingResponse.
425
+
426
+ Strategy:
427
+ 1. Spin up a background thread that runs provider.chat() and
428
+ pushes each chunk into an asyncio.Queue.
429
+ 2. Yield SSE-formatted chunks from the queue in the async loop.
430
+ """
431
+ loop = asyncio.get_event_loop()
432
+ q: asyncio.Queue = asyncio.Queue(maxsize=256)
433
+ chunk_id = f"chatcmpl-{uuid.uuid4().hex[:20]}"
434
+ cancel = threading.Event()
435
+
436
+ # Build kwargs for provider
437
+ messages = [{"role": m.role, "content": m.content} for m in req.messages]
438
+ kwargs: dict = {
439
+ "messages": messages,
440
+ "temperature": req.temperature,
441
+ }
442
+ if req.model:
443
+ kwargs["model"] = req.model
444
+ if req.max_tokens:
445
+ kwargs["max_tokens"] = req.max_tokens
446
+ if req.system:
447
+ kwargs["system"] = req.system
448
+
449
+ # ── Worker thread ────────────────────────────
450
+ def _worker():
451
+ try:
452
+ for chunk in provider.chat(**kwargs):
453
+ if cancel.is_set():
454
+ break
455
+ fut = asyncio.run_coroutine_threadsafe(q.put(chunk), loop)
456
+ fut.result(timeout=10) # backpressure: block thread if queue full
457
+ except Exception as exc:
458
+ err = RuntimeError(f"Stream error: {exc}")
459
+ asyncio.run_coroutine_threadsafe(q.put(err), loop).result(timeout=5)
460
+ finally:
461
+ asyncio.run_coroutine_threadsafe(q.put(None), loop).result(timeout=5)
462
+
463
+ t = threading.Thread(target=_worker, daemon=True)
464
+ t.start()
465
+
466
+ # ── Async consumer ────────────────────────────
467
+ try:
468
+ while True:
469
+ item = await asyncio.wait_for(q.get(), timeout=STREAM_TIMEOUT)
470
+
471
+ if item is None: # sentinel β€” stream done
472
+ yield _sse_done(req.model, chunk_id)
473
+ break
474
+
475
+ if isinstance(item, Exception): # error from worker
476
+ yield _sse_error(str(item))
477
+ break
478
+
479
+ if item: # normal text chunk
480
+ yield _sse_chunk(item, req.model, chunk_id)
481
+
482
+ except asyncio.TimeoutError:
483
+ cancel.set()
484
+ yield _sse_error("Stream timed out β€” no data received")
485
+
486
+ finally:
487
+ cancel.set()
488
+ t.join(timeout=5)
489
+
490
+
491
+ # ═══════════════════════════════════════════════════════════
492
+ # ENDPOINTS
493
+ # ═══════════════════════════════════════════════════════════
494
+
495
+ @app.get("/", tags=["Info"])
496
+ async def root():
497
+ return {
498
+ "service": "Cloudflare AI API",
499
+ "version": "1.0.0",
500
+ "status": "running",
501
+ "endpoints": {
502
+ "chat": "POST /v1/chat/completions",
503
+ "models": "GET /v1/models",
504
+ "health": "GET /health",
505
+ "docs": "GET /docs",
506
+ },
507
+ }
508
+
509
+
510
+ @app.get("/health", tags=["Info"])
511
+ async def health():
512
+ if pool is None:
513
+ raise HTTPException(503, detail="Pool not yet initialized")
514
+ healthy = sum(1 for m in pool._slots if m.is_healthy())
515
+ status = "ok" if healthy > 0 else "degraded"
516
+ return JSONResponse(
517
+ content={"status": status, "pool": pool.status},
518
+ status_code=200 if status == "ok" else 206,
519
+ )
520
+
521
+
522
+ @app.get("/v1/models", tags=["Models"])
523
+ async def list_models():
524
+ if pool is None:
525
+ raise HTTPException(503, detail="Pool not initialized")
526
+
527
+ async with pool.acquire() as provider:
528
+ models = await asyncio.get_event_loop().run_in_executor(
529
+ None, provider.list_models
530
+ )
531
+
532
+ return {
533
+ "object": "list",
534
+ "data": [
535
+ {
536
+ "id": m["name"],
537
+ "object": "model",
538
+ "created": 0,
539
+ "owned_by": "cloudflare",
540
+ "context_window": m.get("context", 4096),
541
+ }
542
+ for m in models
543
+ ],
544
+ }
545
+
546
+
547
+ @app.post("/v1/chat/completions", tags=["Chat"])
548
+ async def chat_completions(req: ChatRequest, request: Request):
549
+ if pool is None:
550
+ raise HTTPException(503, detail="Pool not initialized")
551
+
552
+ if not req.messages:
553
+ raise HTTPException(400, detail="`messages` must not be empty")
554
+
555
+ # ── Streaming ──────────────────────────────────────────
556
+ if req.stream:
557
+ async def _gen():
558
+ async with pool.acquire() as provider:
559
+ async for chunk in _stream_generator(provider, req):
560
+ # Check if client disconnected
561
+ if await request.is_disconnected():
562
+ break
563
+ yield chunk
564
+
565
+ return StreamingResponse(
566
+ _gen(),
567
+ media_type = "text/event-stream",
568
+ headers = {
569
+ "Cache-Control": "no-cache",
570
+ "X-Accel-Buffering": "no",
571
+ "Connection": "keep-alive",
572
+ },
573
+ )
574
+
575
+ # ── Non-streaming ──────────────────────────────────────
576
+ messages = [{"role": m.role, "content": m.content} for m in req.messages]
577
+ kwargs: dict = {
578
+ "messages": messages,
579
+ "temperature": req.temperature,
580
+ }
581
+ if req.model:
582
+ kwargs["model"] = req.model
583
+ if req.max_tokens:
584
+ kwargs["max_tokens"] = req.max_tokens
585
+ if req.system:
586
+ kwargs["system"] = req.system
587
+
588
+ loop = asyncio.get_event_loop()
589
+
590
+ async with pool.acquire() as provider:
591
+ full_parts: list[str] = []
592
+
593
+ def _collect():
594
+ for chunk in provider.chat(**kwargs):
595
+ full_parts.append(chunk)
596
+
597
+ await asyncio.wait_for(
598
+ loop.run_in_executor(None, _collect),
599
+ timeout=STREAM_TIMEOUT,
600
+ )
601
+
602
+ response_text = "".join(full_parts)
603
+
604
+ return {
605
+ "id": f"chatcmpl-{uuid.uuid4().hex[:20]}",
606
+ "object": "chat.completion",
607
+ "created": int(time.time()),
608
+ "model": req.model,
609
+ "choices": [{
610
+ "index": 0,
611
+ "message": {"role": "assistant", "content": response_text},
612
+ "finish_reason": "stop",
613
+ }],
614
+ "usage": {
615
+ "prompt_tokens": 0,
616
+ "completion_tokens": 0,
617
+ "total_tokens": 0,
618
+ },
619
+ }
620
+
621
+
622
+ # ═══════════════════════════════════════════════════════════
623
+ # ENTRY POINT
624
+ # ═══════════════════════════════════════════════════════════
625
+ if __name__ == "__main__":
626
+ uvicorn.run(
627
+ "server:app",
628
+ host = HOST,
629
+ port = PORT,
630
+ log_level = "info",
631
+ workers = 1, # single worker β€” state is in-process
632
+ loop = "asyncio",
633
+ timeout_keep_alive = 30,
634
+ )