Upload 4 files
Browse files- Dockerfile +102 -0
- cloudflare_provider.py +1114 -0
- requirements.txt +11 -0
- server.py +634 -0
Dockerfile
ADDED
|
@@ -0,0 +1,102 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 2 |
+
# β Dockerfile β Cloudflare AI API β
|
| 3 |
+
# β β
|
| 4 |
+
# β Stack: Python 3.11 Β· FastAPI Β· Chrome Β· Xvfb β
|
| 5 |
+
# β Port: 7860 (HuggingFace Spaces default) β
|
| 6 |
+
# β β
|
| 7 |
+
# β NOTE: We use Xvfb (virtual framebuffer) instead of β
|
| 8 |
+
# β Chrome --headless because Cloudflare blocks β
|
| 9 |
+
# β headless user agents at the WebSocket level. β
|
| 10 |
+
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 11 |
+
|
| 12 |
+
FROM python:3.11-slim
|
| 13 |
+
|
| 14 |
+
# ββ System deps & Xvfb ββββββββββββββββββββββββββββββββββββββββ
|
| 15 |
+
RUN apt-get update && apt-get install -y --no-install-recommends \
|
| 16 |
+
# Xvfb virtual framebuffer
|
| 17 |
+
xvfb \
|
| 18 |
+
# Chrome runtime deps
|
| 19 |
+
wget \
|
| 20 |
+
gnupg \
|
| 21 |
+
ca-certificates \
|
| 22 |
+
libx11-6 \
|
| 23 |
+
libx11-xcb1 \
|
| 24 |
+
libxcb1 \
|
| 25 |
+
libxcomposite1 \
|
| 26 |
+
libxcursor1 \
|
| 27 |
+
libxdamage1 \
|
| 28 |
+
libxext6 \
|
| 29 |
+
libxfixes3 \
|
| 30 |
+
libxi6 \
|
| 31 |
+
libxrandr2 \
|
| 32 |
+
libxrender1 \
|
| 33 |
+
libxss1 \
|
| 34 |
+
libxtst6 \
|
| 35 |
+
libglib2.0-0 \
|
| 36 |
+
libgtk-3-0 \
|
| 37 |
+
libnspr4 \
|
| 38 |
+
libnss3 \
|
| 39 |
+
libatk1.0-0 \
|
| 40 |
+
libatk-bridge2.0-0 \
|
| 41 |
+
libdrm2 \
|
| 42 |
+
libgbm1 \
|
| 43 |
+
libcups2 \
|
| 44 |
+
libasound2 \
|
| 45 |
+
libpango-1.0-0 \
|
| 46 |
+
libpangocairo-1.0-0 \
|
| 47 |
+
fonts-liberation \
|
| 48 |
+
libappindicator3-1 \
|
| 49 |
+
xdg-utils \
|
| 50 |
+
lsb-release \
|
| 51 |
+
&& rm -rf /var/lib/apt/lists/*
|
| 52 |
+
|
| 53 |
+
# ββ Google Chrome stable βββββββββββββββββββββββββββββββββββββββ
|
| 54 |
+
RUN wget -q -O /tmp/chrome.deb \
|
| 55 |
+
https://dl.google.com/linux/direct/google-chrome-stable_current_amd64.deb \
|
| 56 |
+
&& apt-get update \
|
| 57 |
+
&& apt-get install -y --no-install-recommends /tmp/chrome.deb \
|
| 58 |
+
&& rm -f /tmp/chrome.deb \
|
| 59 |
+
&& rm -rf /var/lib/apt/lists/* \
|
| 60 |
+
# Verify Chrome installed
|
| 61 |
+
&& google-chrome --version
|
| 62 |
+
|
| 63 |
+
# ββ Working directory ββββββββββββββββββββββββββββββββββββββββββ
|
| 64 |
+
WORKDIR /app
|
| 65 |
+
|
| 66 |
+
# ββ Python deps ββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 67 |
+
COPY requirements.txt .
|
| 68 |
+
RUN pip install --no-cache-dir --upgrade pip \
|
| 69 |
+
&& pip install --no-cache-dir -r requirements.txt
|
| 70 |
+
|
| 71 |
+
# ββ App source βββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 72 |
+
COPY cloudflare_provider.py .
|
| 73 |
+
COPY server.py .
|
| 74 |
+
|
| 75 |
+
# ββ Cache directory ββββββββββββββββββββββββββββββββββββββββββββ
|
| 76 |
+
RUN mkdir -p /app/cache
|
| 77 |
+
|
| 78 |
+
# ββ Non-root user (HuggingFace Spaces requirement) βββββββββββββ
|
| 79 |
+
RUN useradd -m -u 1000 appuser \
|
| 80 |
+
&& chown -R appuser:appuser /app
|
| 81 |
+
USER appuser
|
| 82 |
+
|
| 83 |
+
# ββ Environment ββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 84 |
+
ENV PYTHONUNBUFFERED=1 \
|
| 85 |
+
PYTHONDONTWRITEBYTECODE=1 \
|
| 86 |
+
# Enable Xvfb virtual display β REQUIRED (no headless Chrome)
|
| 87 |
+
VR_DISPLAY=1 \
|
| 88 |
+
# Pool: 2 pre-warmed connections
|
| 89 |
+
POOL_SIZE=2 \
|
| 90 |
+
# Port
|
| 91 |
+
PORT=7860 \
|
| 92 |
+
HOST=0.0.0.0 \
|
| 93 |
+
# Health monitor interval (seconds)
|
| 94 |
+
HEALTH_INTERVAL=60 \
|
| 95 |
+
# Default model
|
| 96 |
+
DEFAULT_MODEL=@cf/moonshotai/kimi-k2.5
|
| 97 |
+
|
| 98 |
+
# ββ Expose βββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 99 |
+
EXPOSE 7860
|
| 100 |
+
|
| 101 |
+
# ββ Start server βββββββββββββββββββββββββββββββββββββββββββββββ
|
| 102 |
+
CMD ["python", "server.py"]
|
cloudflare_provider.py
ADDED
|
@@ -0,0 +1,1114 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 3 |
+
β cloudflare_provider.py β
|
| 4 |
+
β Cloudflare AI Playground β Reverse Engineered Provider β
|
| 5 |
+
β β
|
| 6 |
+
β Connection Strategy: β
|
| 7 |
+
β 1. Try DIRECT Python WebSocket (no browser needed) β
|
| 8 |
+
β 2. If blocked β launch browser with Xvfb virtual display β
|
| 9 |
+
β extract cookies β reconnect with cookies via Python WS β
|
| 10 |
+
β 3. If still blocked β keep browser as WS relay β
|
| 11 |
+
β β
|
| 12 |
+
β Virtual Display: β
|
| 13 |
+
β Set env var VR_DISPLAY=1 to auto-start Xvfb via β
|
| 14 |
+
β pyvirtualdisplay (required on headless Linux / HF Spaces). β
|
| 15 |
+
β β
|
| 16 |
+
β NOTE: Headless Chrome is intentionally disabled β β
|
| 17 |
+
β Cloudflare blocks headless user agents. β
|
| 18 |
+
βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 19 |
+
"""
|
| 20 |
+
|
| 21 |
+
import atexit
|
| 22 |
+
import json
|
| 23 |
+
import os
|
| 24 |
+
import sys
|
| 25 |
+
import time
|
| 26 |
+
import uuid
|
| 27 |
+
import random
|
| 28 |
+
import string
|
| 29 |
+
import threading
|
| 30 |
+
from pathlib import Path
|
| 31 |
+
from typing import Generator, Optional
|
| 32 |
+
|
| 33 |
+
|
| 34 |
+
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 35 |
+
# Β§1 β AUTO INSTALL
|
| 36 |
+
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 37 |
+
def _install(pkg, pip_name=None):
|
| 38 |
+
try:
|
| 39 |
+
__import__(pkg)
|
| 40 |
+
except ImportError:
|
| 41 |
+
import subprocess
|
| 42 |
+
subprocess.check_call(
|
| 43 |
+
[sys.executable, "-m", "pip", "install", "-q", pip_name or pkg]
|
| 44 |
+
)
|
| 45 |
+
|
| 46 |
+
_install("websocket", "websocket-client")
|
| 47 |
+
import websocket as _ws_mod
|
| 48 |
+
|
| 49 |
+
_HAS_BROWSER = False
|
| 50 |
+
try:
|
| 51 |
+
_install("DrissionPage")
|
| 52 |
+
from DrissionPage import ChromiumPage, ChromiumOptions
|
| 53 |
+
_HAS_BROWSER = True
|
| 54 |
+
except Exception:
|
| 55 |
+
pass
|
| 56 |
+
|
| 57 |
+
|
| 58 |
+
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 59 |
+
# Β§1b β VIRTUAL DISPLAY (from OS env)
|
| 60 |
+
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 61 |
+
def _parse_bool_env(key: str, default: bool = False) -> bool:
|
| 62 |
+
val = os.environ.get(key, "").strip().lower()
|
| 63 |
+
if not val:
|
| 64 |
+
return default
|
| 65 |
+
return val in ("1", "true", "yes", "on", "enable", "enabled")
|
| 66 |
+
|
| 67 |
+
|
| 68 |
+
VR_DISPLAY = _parse_bool_env("VR_DISPLAY", default=False)
|
| 69 |
+
|
| 70 |
+
_HAS_VIRTUAL_DISPLAY = False
|
| 71 |
+
_Display = None
|
| 72 |
+
|
| 73 |
+
if VR_DISPLAY:
|
| 74 |
+
try:
|
| 75 |
+
_install("pyvirtualdisplay", "PyVirtualDisplay")
|
| 76 |
+
from pyvirtualdisplay import Display as _Display
|
| 77 |
+
_HAS_VIRTUAL_DISPLAY = True
|
| 78 |
+
except Exception as _vd_err:
|
| 79 |
+
print(
|
| 80 |
+
f"[cloudflare] β VR_DISPLAY=1 but pyvirtualdisplay failed: {_vd_err}\n"
|
| 81 |
+
f"[cloudflare] Make sure Xvfb is installed: sudo apt install xvfb",
|
| 82 |
+
file=sys.stderr, flush=True,
|
| 83 |
+
)
|
| 84 |
+
|
| 85 |
+
|
| 86 |
+
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 87 |
+
# Β§2 β CONSTANTS
|
| 88 |
+
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 89 |
+
_SITE = "https://playground.ai.cloudflare.com"
|
| 90 |
+
_WS_BASE = "wss://playground.ai.cloudflare.com/agents/playground"
|
| 91 |
+
_CACHE = Path(__file__).resolve().parent / "cache"
|
| 92 |
+
_MFILE = _CACHE / "cloudflare_models.json"
|
| 93 |
+
_CHARS = string.ascii_letters + string.digits
|
| 94 |
+
_LOWER = string.ascii_lowercase + string.digits
|
| 95 |
+
_UA = (
|
| 96 |
+
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
|
| 97 |
+
"AppleWebKit/537.36 (KHTML, like Gecko) "
|
| 98 |
+
"Chrome/146.0.0.0 Safari/537.36"
|
| 99 |
+
)
|
| 100 |
+
|
| 101 |
+
# Model cache TTL β 6 hours
|
| 102 |
+
_CACHE_TTL_SECONDS = 6 * 3600
|
| 103 |
+
|
| 104 |
+
|
| 105 |
+
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 106 |
+
# Β§3 β MODEL TABLE (short alias β full @cf/@hf ID)
|
| 107 |
+
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββοΏ½οΏ½
|
| 108 |
+
_SHORT_TO_FULL: dict[str, str] = {
|
| 109 |
+
"gpt-oss-120b": "@cf/openai/gpt-oss-120b",
|
| 110 |
+
"gpt-oss-20b": "@cf/openai/gpt-oss-20b",
|
| 111 |
+
"qwen1.5-0.5b-chat": "@cf/qwen/qwen1.5-0.5b-chat",
|
| 112 |
+
"qwen1.5-1.8b-chat": "@cf/qwen/qwen1.5-1.8b-chat",
|
| 113 |
+
"qwen1.5-7b-chat-awq": "@cf/qwen/qwen1.5-7b-chat-awq",
|
| 114 |
+
"qwen1.5-14b-chat-awq": "@cf/qwen/qwen1.5-14b-chat-awq",
|
| 115 |
+
"qwen2.5-coder-32b-instruct": "@cf/qwen/qwen2.5-coder-32b-instruct",
|
| 116 |
+
"qwq-32b": "@cf/qwen/qwq-32b",
|
| 117 |
+
"qwen3-30b-a3b-fp8": "@cf/qwen/qwen3-30b-a3b-fp8",
|
| 118 |
+
"gemma-2b-it-lora": "@cf/google/gemma-2b-it-lora",
|
| 119 |
+
"gemma-7b-it-lora": "@cf/google/gemma-7b-it-lora",
|
| 120 |
+
"gemma-3-12b-it": "@cf/google/gemma-3-12b-it",
|
| 121 |
+
"gemma-7b-it": "@hf/google/gemma-7b-it",
|
| 122 |
+
"starling-lm-7b-beta": "@hf/nexusflow/starling-lm-7b-beta",
|
| 123 |
+
"llama-3-8b-instruct": "@cf/meta/llama-3-8b-instruct",
|
| 124 |
+
"llama-3-8b-instruct-awq": "@cf/meta/llama-3-8b-instruct-awq",
|
| 125 |
+
"llama-3.2-3b-instruct": "@cf/meta/llama-3.2-3b-instruct",
|
| 126 |
+
"llama-3.2-1b-instruct": "@cf/meta/llama-3.2-1b-instruct",
|
| 127 |
+
"llama-3.2-11b-vision-instruct": "@cf/meta/llama-3.2-11b-vision-instruct",
|
| 128 |
+
"llama-3.3-70b-instruct-fp8-fast": "@cf/meta/llama-3.3-70b-instruct-fp8-fast",
|
| 129 |
+
"llama-3.1-8b-instruct-fp8": "@cf/meta/llama-3.1-8b-instruct-fp8",
|
| 130 |
+
"llama-3.1-8b-instruct-awq": "@cf/meta/llama-3.1-8b-instruct-awq",
|
| 131 |
+
"llama-3.1-70b-instruct": "@cf/meta/llama-3.1-70b-instruct",
|
| 132 |
+
"llama-4-scout-17b-16e-instruct": "@cf/meta/llama-4-scout-17b-16e-instruct",
|
| 133 |
+
"llama-2-7b-chat-fp16": "@cf/meta/llama-2-7b-chat-fp16",
|
| 134 |
+
"llama-2-7b-chat-int8": "@cf/meta/llama-2-7b-chat-int8",
|
| 135 |
+
"llama-2-7b-chat-hf-lora": "@cf/meta-llama/llama-2-7b-chat-hf-lora",
|
| 136 |
+
"llama-guard-3-8b": "@cf/meta/llama-guard-3-8b",
|
| 137 |
+
"mistral-7b-instruct-v0.1": "@cf/mistral/mistral-7b-instruct-v0.1",
|
| 138 |
+
"mistral-7b-instruct-v0.2-lora": "@cf/mistral/mistral-7b-instruct-v0.2-lora",
|
| 139 |
+
"mistral-7b-instruct-v0.2": "@hf/mistral/mistral-7b-instruct-v0.2",
|
| 140 |
+
"mistral-7b-instruct-v0.1-awq": "@hf/thebloke/mistral-7b-instruct-v0.1-awq",
|
| 141 |
+
"mistral-small-3.1-24b-instruct": "@cf/mistralai/mistral-small-3.1-24b-instruct",
|
| 142 |
+
"deepseek-r1-distill-qwen-32b": "@cf/deepseek-ai/deepseek-r1-distill-qwen-32b",
|
| 143 |
+
"deepseek-math-7b-instruct": "@cf/deepseek-ai/deepseek-math-7b-instruct",
|
| 144 |
+
"deepseek-coder-6.7b-base-awq": "@hf/thebloke/deepseek-coder-6.7b-base-awq",
|
| 145 |
+
"deepseek-coder-6.7b-instruct-awq":"@hf/thebloke/deepseek-coder-6.7b-instruct-awq",
|
| 146 |
+
"tinyllama-1.1b-chat-v1.0": "@cf/tinyllama/tinyllama-1.1b-chat-v1.0",
|
| 147 |
+
"falcon-7b-instruct": "@cf/tiiuae/falcon-7b-instruct",
|
| 148 |
+
"hermes-2-pro-mistral-7b": "@hf/nousresearch/hermes-2-pro-mistral-7b",
|
| 149 |
+
"neural-chat-7b-v3-1-awq": "@hf/thebloke/neural-chat-7b-v3-1-awq",
|
| 150 |
+
"openhermes-2.5-mistral-7b-awq": "@hf/thebloke/openhermes-2.5-mistral-7b-awq",
|
| 151 |
+
"openchat-3.5-0106": "@cf/openchat/openchat-3.5-0106",
|
| 152 |
+
"llama-2-13b-chat-awq": "@hf/thebloke/llama-2-13b-chat-awq",
|
| 153 |
+
"zephyr-7b-beta-awq": "@hf/thebloke/zephyr-7b-beta-awq",
|
| 154 |
+
"discolm-german-7b-v1-awq": "@cf/thebloke/discolm-german-7b-v1-awq",
|
| 155 |
+
"una-cybertron-7b-v2-bf16": "@cf/fblgit/una-cybertron-7b-v2-bf16",
|
| 156 |
+
"sqlcoder-7b-2": "@cf/defog/sqlcoder-7b-2",
|
| 157 |
+
"phi-2": "@cf/microsoft/phi-2",
|
| 158 |
+
"nemotron-3-120b-a12b": "@cf/nvidia/nemotron-3-120b-a12b",
|
| 159 |
+
"gemma-sea-lion-v4-27b-it": "@cf/aisingapore/gemma-sea-lion-v4-27b-it",
|
| 160 |
+
"glm-4.7-flash": "@cf/zai-org/glm-4.7-flash",
|
| 161 |
+
"granite-4.0-h-micro": "@cf/ibm-granite/granite-4.0-h-micro",
|
| 162 |
+
"kimi-k2.5": "@cf/moonshotai/kimi-k2.5",
|
| 163 |
+
}
|
| 164 |
+
|
| 165 |
+
|
| 166 |
+
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 167 |
+
# Β§4 β HELPERS
|
| 168 |
+
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 169 |
+
def _rid(n=8):
|
| 170 |
+
return "".join(random.choices(_CHARS, k=n))
|
| 171 |
+
|
| 172 |
+
def _rid_lower(n=9):
|
| 173 |
+
return "".join(random.choices(_LOWER, k=n))
|
| 174 |
+
|
| 175 |
+
def _make_sid():
|
| 176 |
+
return "Cloudflare-AI-Playground-" + _rid(21)
|
| 177 |
+
|
| 178 |
+
def _make_pk():
|
| 179 |
+
return str(uuid.uuid4())
|
| 180 |
+
|
| 181 |
+
def _make_ws_url(sid, pk):
|
| 182 |
+
return f"{_WS_BASE}/{sid}?_pk={pk}"
|
| 183 |
+
|
| 184 |
+
def _asst_id():
|
| 185 |
+
return f"assistant_{int(time.time()*1000)}_{_rid_lower(9)}"
|
| 186 |
+
|
| 187 |
+
def _resolve_model(name: str) -> str:
|
| 188 |
+
if not name:
|
| 189 |
+
return name
|
| 190 |
+
if name.startswith("@cf/") or name.startswith("@hf/"):
|
| 191 |
+
return name
|
| 192 |
+
return _SHORT_TO_FULL.get(name, name)
|
| 193 |
+
|
| 194 |
+
|
| 195 |
+
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 196 |
+
# Β§5 β CONVERTER + BUILDER
|
| 197 |
+
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 198 |
+
class _Conv:
|
| 199 |
+
@staticmethod
|
| 200 |
+
def to_cf(msgs):
|
| 201 |
+
sys_p, out = "", []
|
| 202 |
+
for m in msgs:
|
| 203 |
+
r, c = m.get("role", ""), m.get("content", "")
|
| 204 |
+
if r == "system":
|
| 205 |
+
sys_p = c
|
| 206 |
+
elif r == "user":
|
| 207 |
+
out.append({
|
| 208 |
+
"role": "user",
|
| 209 |
+
"parts": [{"type": "text", "text": c}],
|
| 210 |
+
"id": _rid(16),
|
| 211 |
+
})
|
| 212 |
+
elif r == "assistant":
|
| 213 |
+
out.append({
|
| 214 |
+
"id": _asst_id(),
|
| 215 |
+
"role": "assistant",
|
| 216 |
+
"parts": [
|
| 217 |
+
{"type": "step-start"},
|
| 218 |
+
{"type": "text", "text": c, "state": "done"},
|
| 219 |
+
],
|
| 220 |
+
})
|
| 221 |
+
return sys_p, out
|
| 222 |
+
|
| 223 |
+
@staticmethod
|
| 224 |
+
def to_openai(cf_msgs, system=""):
|
| 225 |
+
out = []
|
| 226 |
+
if system:
|
| 227 |
+
out.append({"role": "system", "content": system})
|
| 228 |
+
for m in cf_msgs:
|
| 229 |
+
r = m.get("role", "")
|
| 230 |
+
t = next(
|
| 231 |
+
(p.get("text", "") for p in m.get("parts", [])
|
| 232 |
+
if p.get("type") == "text"),
|
| 233 |
+
"",
|
| 234 |
+
)
|
| 235 |
+
if r in ("user", "assistant") and t:
|
| 236 |
+
out.append({"role": r, "content": t})
|
| 237 |
+
return out
|
| 238 |
+
|
| 239 |
+
|
| 240 |
+
class _Build:
|
| 241 |
+
@staticmethod
|
| 242 |
+
def user(text):
|
| 243 |
+
return {
|
| 244 |
+
"role": "user",
|
| 245 |
+
"parts": [{"type": "text", "text": text}],
|
| 246 |
+
"id": _rid(16),
|
| 247 |
+
}
|
| 248 |
+
|
| 249 |
+
@staticmethod
|
| 250 |
+
def asst(text, reason=""):
|
| 251 |
+
p = [{"type": "step-start"}]
|
| 252 |
+
if reason:
|
| 253 |
+
p.append({"type": "reasoning", "text": reason, "state": "done"})
|
| 254 |
+
p.append({"type": "text", "text": text, "state": "done"})
|
| 255 |
+
return {"id": _asst_id(), "role": "assistant", "parts": p}
|
| 256 |
+
|
| 257 |
+
@staticmethod
|
| 258 |
+
def req(msgs):
|
| 259 |
+
return {
|
| 260 |
+
"id": _rid(8),
|
| 261 |
+
"init": {
|
| 262 |
+
"method": "POST",
|
| 263 |
+
"body": json.dumps({
|
| 264 |
+
"messages": msgs,
|
| 265 |
+
"trigger": "submit-message",
|
| 266 |
+
}, ensure_ascii=False),
|
| 267 |
+
},
|
| 268 |
+
"type": "cf_agent_use_chat_request",
|
| 269 |
+
}
|
| 270 |
+
|
| 271 |
+
|
| 272 |
+
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 273 |
+
# Β§6 β MODEL CACHE (with TTL)
|
| 274 |
+
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 275 |
+
class _Cache:
|
| 276 |
+
@staticmethod
|
| 277 |
+
def save(models):
|
| 278 |
+
_CACHE.mkdir(parents=True, exist_ok=True)
|
| 279 |
+
_MFILE.write_text(json.dumps({
|
| 280 |
+
"ts": time.time(), # epoch for TTL checks
|
| 281 |
+
"ts_human": time.strftime("%Y-%m-%d %H:%M:%S"),
|
| 282 |
+
"models": models,
|
| 283 |
+
}, indent=2, ensure_ascii=False))
|
| 284 |
+
|
| 285 |
+
@staticmethod
|
| 286 |
+
def load(ttl: int = _CACHE_TTL_SECONDS):
|
| 287 |
+
"""Load cache only if it exists and is within TTL."""
|
| 288 |
+
if not _MFILE.exists():
|
| 289 |
+
return None
|
| 290 |
+
try:
|
| 291 |
+
data = json.loads(_MFILE.read_text())
|
| 292 |
+
age = time.time() - data.get("ts", 0)
|
| 293 |
+
if age > ttl:
|
| 294 |
+
return None # stale β force refresh
|
| 295 |
+
return data.get("models")
|
| 296 |
+
except Exception:
|
| 297 |
+
return None
|
| 298 |
+
|
| 299 |
+
@staticmethod
|
| 300 |
+
def clear():
|
| 301 |
+
if _MFILE.exists():
|
| 302 |
+
_MFILE.unlink()
|
| 303 |
+
|
| 304 |
+
|
| 305 |
+
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 306 |
+
# Β§6b β VIRTUAL DISPLAY MANAGER
|
| 307 |
+
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 308 |
+
class _VirtualDisplayManager:
|
| 309 |
+
"""Thread-safe singleton that manages a single Xvfb display."""
|
| 310 |
+
|
| 311 |
+
_instance = None
|
| 312 |
+
_lock = threading.Lock()
|
| 313 |
+
|
| 314 |
+
def __init__(self):
|
| 315 |
+
self._display = None
|
| 316 |
+
self._running = False
|
| 317 |
+
self._enabled = VR_DISPLAY and _HAS_VIRTUAL_DISPLAY
|
| 318 |
+
|
| 319 |
+
@classmethod
|
| 320 |
+
def instance(cls) -> "_VirtualDisplayManager":
|
| 321 |
+
if cls._instance is None:
|
| 322 |
+
with cls._lock:
|
| 323 |
+
if cls._instance is None:
|
| 324 |
+
cls._instance = cls()
|
| 325 |
+
return cls._instance
|
| 326 |
+
|
| 327 |
+
@property
|
| 328 |
+
def enabled(self) -> bool:
|
| 329 |
+
return self._enabled
|
| 330 |
+
|
| 331 |
+
@property
|
| 332 |
+
def running(self) -> bool:
|
| 333 |
+
return self._running
|
| 334 |
+
|
| 335 |
+
def start(self, width: int = 1920, height: int = 1080, depth: int = 24):
|
| 336 |
+
if not self._enabled:
|
| 337 |
+
return
|
| 338 |
+
if self._running:
|
| 339 |
+
return
|
| 340 |
+
|
| 341 |
+
with self._lock:
|
| 342 |
+
if self._running:
|
| 343 |
+
return
|
| 344 |
+
try:
|
| 345 |
+
self._display = _Display(
|
| 346 |
+
visible=False,
|
| 347 |
+
size=(width, height),
|
| 348 |
+
color_depth=depth,
|
| 349 |
+
backend="xvfb",
|
| 350 |
+
)
|
| 351 |
+
self._display.start()
|
| 352 |
+
self._running = True
|
| 353 |
+
_log_vd(f"β Virtual display started "
|
| 354 |
+
f"({width}x{height}x{depth}) "
|
| 355 |
+
f"on :{self._display.display}")
|
| 356 |
+
except FileNotFoundError:
|
| 357 |
+
_log_vd(
|
| 358 |
+
"β Xvfb binary not found! Install: sudo apt install xvfb"
|
| 359 |
+
)
|
| 360 |
+
self._enabled = False
|
| 361 |
+
except Exception as exc:
|
| 362 |
+
_log_vd(f"β Failed to start virtual display: {exc}")
|
| 363 |
+
self._enabled = False
|
| 364 |
+
|
| 365 |
+
def stop(self):
|
| 366 |
+
if not self._running:
|
| 367 |
+
return
|
| 368 |
+
with self._lock:
|
| 369 |
+
if not self._running:
|
| 370 |
+
return
|
| 371 |
+
try:
|
| 372 |
+
if self._display:
|
| 373 |
+
self._display.stop()
|
| 374 |
+
_log_vd("β Virtual display stopped")
|
| 375 |
+
except Exception as exc:
|
| 376 |
+
_log_vd(f"β Error stopping virtual display: {exc}")
|
| 377 |
+
finally:
|
| 378 |
+
self._display = None
|
| 379 |
+
self._running = False
|
| 380 |
+
|
| 381 |
+
def __repr__(self):
|
| 382 |
+
state = "running" if self._running else ("idle" if self._enabled else "disabled")
|
| 383 |
+
disp = f" :{self._display.display}" if self._running and self._display else ""
|
| 384 |
+
return f"VirtualDisplay({state}{disp})"
|
| 385 |
+
|
| 386 |
+
|
| 387 |
+
def _log_vd(msg: str):
|
| 388 |
+
print(f"[cloudflare:vdisplay] {msg}", file=sys.stderr, flush=True)
|
| 389 |
+
|
| 390 |
+
|
| 391 |
+
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 392 |
+
# Β§7 β TRANSPORT LAYER
|
| 393 |
+
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 394 |
+
|
| 395 |
+
# ββ 7a: Direct Python WebSocket ββββββββββββββββββββββββββ
|
| 396 |
+
class _DirectTransport:
|
| 397 |
+
"""Pure Python WS via websocket-client with background recv thread."""
|
| 398 |
+
|
| 399 |
+
def __init__(self, debug=False):
|
| 400 |
+
self._ws = None
|
| 401 |
+
self._inbox = []
|
| 402 |
+
self._lock = threading.Lock()
|
| 403 |
+
self._running = False
|
| 404 |
+
self._thread = None
|
| 405 |
+
self._debug = debug
|
| 406 |
+
|
| 407 |
+
def connect(self, url: str, cookies: str = "") -> bool:
|
| 408 |
+
self._ws = _ws_mod.WebSocket()
|
| 409 |
+
headers = [f"User-Agent: {_UA}"]
|
| 410 |
+
if cookies:
|
| 411 |
+
headers.append(f"Cookie: {cookies}")
|
| 412 |
+
|
| 413 |
+
self._ws.connect(
|
| 414 |
+
url,
|
| 415 |
+
origin=_SITE,
|
| 416 |
+
header=headers,
|
| 417 |
+
timeout=15,
|
| 418 |
+
)
|
| 419 |
+
|
| 420 |
+
self._running = True
|
| 421 |
+
self._thread = threading.Thread(target=self._loop, daemon=True)
|
| 422 |
+
self._thread.start()
|
| 423 |
+
return True
|
| 424 |
+
|
| 425 |
+
def _loop(self):
|
| 426 |
+
self._ws.settimeout(0.05)
|
| 427 |
+
while self._running:
|
| 428 |
+
try:
|
| 429 |
+
data = self._ws.recv()
|
| 430 |
+
if data:
|
| 431 |
+
with self._lock:
|
| 432 |
+
self._inbox.append(data)
|
| 433 |
+
except _ws_mod.WebSocketTimeoutException:
|
| 434 |
+
continue
|
| 435 |
+
except _ws_mod.WebSocketConnectionClosedException:
|
| 436 |
+
self._running = False
|
| 437 |
+
break
|
| 438 |
+
except Exception:
|
| 439 |
+
if self._running:
|
| 440 |
+
self._running = False
|
| 441 |
+
break
|
| 442 |
+
|
| 443 |
+
def send(self, data: str) -> bool:
|
| 444 |
+
try:
|
| 445 |
+
if self._ws and self._ws.connected:
|
| 446 |
+
self._ws.send(data)
|
| 447 |
+
return True
|
| 448 |
+
except Exception:
|
| 449 |
+
pass
|
| 450 |
+
return False
|
| 451 |
+
|
| 452 |
+
def recv(self) -> list[str]:
|
| 453 |
+
with self._lock:
|
| 454 |
+
msgs = self._inbox[:]
|
| 455 |
+
self._inbox.clear()
|
| 456 |
+
return msgs
|
| 457 |
+
|
| 458 |
+
@property
|
| 459 |
+
def alive(self) -> bool:
|
| 460 |
+
return self._running and self._ws is not None and self._ws.connected
|
| 461 |
+
|
| 462 |
+
def close(self):
|
| 463 |
+
self._running = False
|
| 464 |
+
if self._ws:
|
| 465 |
+
try:
|
| 466 |
+
self._ws.close()
|
| 467 |
+
except Exception:
|
| 468 |
+
pass
|
| 469 |
+
self._ws = None
|
| 470 |
+
if self._thread and self._thread.is_alive():
|
| 471 |
+
self._thread.join(timeout=2)
|
| 472 |
+
self._thread = None
|
| 473 |
+
|
| 474 |
+
|
| 475 |
+
# ββ 7b: Browser-based WebSocket (fallback) βββββββββββββββ
|
| 476 |
+
_BROWSER_JS = """
|
| 477 |
+
(function(){
|
| 478 |
+
if(window.__cfws) return 'exists';
|
| 479 |
+
window.__cfws = {
|
| 480 |
+
sock:null, alive:false, inbox:[], error:null,
|
| 481 |
+
connect:function(u){
|
| 482 |
+
var s=this; s.error=null; s.alive=false; s.inbox=[];
|
| 483 |
+
s.sock=new WebSocket(u);
|
| 484 |
+
s.sock.onopen=function(){s.alive=true;s.error=null};
|
| 485 |
+
s.sock.onmessage=function(e){s.inbox.push(e.data)};
|
| 486 |
+
s.sock.onerror=function(){s.error='ws_error'};
|
| 487 |
+
s.sock.onclose=function(e){
|
| 488 |
+
s.alive=false;
|
| 489 |
+
if(e.code!==1000&&e.code!==1005)s.error='closed_'+e.code
|
| 490 |
+
};
|
| 491 |
+
},
|
| 492 |
+
send:function(d){
|
| 493 |
+
if(this.sock&&this.sock.readyState===1){
|
| 494 |
+
this.sock.send(typeof d==='string'?d:JSON.stringify(d));
|
| 495 |
+
return true}return false},
|
| 496 |
+
drain:function(){
|
| 497 |
+
if(!this.inbox.length)return null;
|
| 498 |
+
var r=JSON.stringify(this.inbox);this.inbox=[];return r},
|
| 499 |
+
kill:function(){if(this.sock)this.sock.close();this.alive=false}
|
| 500 |
+
};
|
| 501 |
+
return 'ok';
|
| 502 |
+
})();
|
| 503 |
+
"""
|
| 504 |
+
|
| 505 |
+
|
| 506 |
+
class _BrowserTransport:
|
| 507 |
+
"""
|
| 508 |
+
Headless-FREE Chrome WebSocket relay via Xvfb virtual display.
|
| 509 |
+
|
| 510 |
+
NOTE: We intentionally do NOT use Chrome's --headless flag because
|
| 511 |
+
Cloudflare Playground detects and blocks headless user agents.
|
| 512 |
+
Instead we rely on pyvirtualdisplay / Xvfb to provide a real (but
|
| 513 |
+
invisible) X11 display on servers that have no physical monitor.
|
| 514 |
+
|
| 515 |
+
Set VR_DISPLAY=1 before importing to enable this behaviour.
|
| 516 |
+
"""
|
| 517 |
+
|
| 518 |
+
def __init__(self, debug=False):
|
| 519 |
+
self._page = None
|
| 520 |
+
self._debug = debug
|
| 521 |
+
self._vd_mgr = _VirtualDisplayManager.instance()
|
| 522 |
+
|
| 523 |
+
def connect(self, url: str, **_) -> bool:
|
| 524 |
+
if not _HAS_BROWSER:
|
| 525 |
+
raise RuntimeError(
|
| 526 |
+
"DrissionPage not available β cannot use browser fallback"
|
| 527 |
+
)
|
| 528 |
+
|
| 529 |
+
# ββ Start virtual display (Xvfb) if enabled ββββββ
|
| 530 |
+
if self._vd_mgr.enabled and not self._vd_mgr.running:
|
| 531 |
+
self._vd_mgr.start()
|
| 532 |
+
if not self._vd_mgr.running:
|
| 533 |
+
raise RuntimeError(
|
| 534 |
+
"Virtual display (Xvfb) failed to start. "
|
| 535 |
+
"Install xvfb: apt-get install -y xvfb"
|
| 536 |
+
)
|
| 537 |
+
|
| 538 |
+
if not self._vd_mgr.running:
|
| 539 |
+
raise RuntimeError(
|
| 540 |
+
"No display available and VR_DISPLAY is not set. "
|
| 541 |
+
"Set VR_DISPLAY=1 to use Xvfb virtual display, "
|
| 542 |
+
"or run on a machine with a real display. "
|
| 543 |
+
"Headless Chrome is intentionally disabled."
|
| 544 |
+
)
|
| 545 |
+
|
| 546 |
+
opts = ChromiumOptions()
|
| 547 |
+
opts.set_argument("--disable-blink-features=AutomationControlled")
|
| 548 |
+
opts.set_argument("--no-sandbox")
|
| 549 |
+
opts.set_argument("--disable-dev-shm-usage")
|
| 550 |
+
opts.set_argument("--disable-gpu")
|
| 551 |
+
opts.set_argument("--disable-extensions")
|
| 552 |
+
opts.set_argument("--disable-plugins")
|
| 553 |
+
opts.set_argument("--disable-infobars")
|
| 554 |
+
opts.set_argument("--window-size=1280,720")
|
| 555 |
+
# ββ NO headless flag β Cloudflare blocks headless ββ
|
| 556 |
+
|
| 557 |
+
self._page = ChromiumPage(addr_or_opts=opts)
|
| 558 |
+
self._page.get(_SITE)
|
| 559 |
+
time.sleep(4)
|
| 560 |
+
|
| 561 |
+
self._page.run_js(_BROWSER_JS)
|
| 562 |
+
self._page.run_js(f"window.__cfws.connect('{url}');")
|
| 563 |
+
|
| 564 |
+
deadline = time.time() + 15
|
| 565 |
+
while time.time() < deadline:
|
| 566 |
+
if self._page.run_js("return window.__cfws.alive;"):
|
| 567 |
+
return True
|
| 568 |
+
err = self._page.run_js("return window.__cfws.error;")
|
| 569 |
+
if err:
|
| 570 |
+
raise ConnectionError(f"Browser WS failed: {err}")
|
| 571 |
+
time.sleep(0.1)
|
| 572 |
+
|
| 573 |
+
raise ConnectionError("Browser WS timed out waiting for connection")
|
| 574 |
+
|
| 575 |
+
def send(self, data: str) -> bool:
|
| 576 |
+
try:
|
| 577 |
+
return bool(
|
| 578 |
+
self._page.run_js(
|
| 579 |
+
f"return window.__cfws.send({json.dumps(data)});"
|
| 580 |
+
)
|
| 581 |
+
)
|
| 582 |
+
except Exception:
|
| 583 |
+
return False
|
| 584 |
+
|
| 585 |
+
def recv(self) -> list[str]:
|
| 586 |
+
try:
|
| 587 |
+
raw = self._page.run_js("return window.__cfws.drain();")
|
| 588 |
+
except Exception:
|
| 589 |
+
return []
|
| 590 |
+
if not raw:
|
| 591 |
+
return []
|
| 592 |
+
try:
|
| 593 |
+
batch = json.loads(raw)
|
| 594 |
+
return batch if isinstance(batch, list) else []
|
| 595 |
+
except (json.JSONDecodeError, TypeError):
|
| 596 |
+
return []
|
| 597 |
+
|
| 598 |
+
@property
|
| 599 |
+
def alive(self) -> bool:
|
| 600 |
+
try:
|
| 601 |
+
return bool(self._page.run_js("return window.__cfws.alive;"))
|
| 602 |
+
except Exception:
|
| 603 |
+
return False
|
| 604 |
+
|
| 605 |
+
def close(self):
|
| 606 |
+
if self._page:
|
| 607 |
+
try:
|
| 608 |
+
self._page.run_js("if(window.__cfws) window.__cfws.kill();")
|
| 609 |
+
except Exception:
|
| 610 |
+
pass
|
| 611 |
+
try:
|
| 612 |
+
self._page.quit()
|
| 613 |
+
except Exception:
|
| 614 |
+
pass
|
| 615 |
+
self._page = None
|
| 616 |
+
|
| 617 |
+
|
| 618 |
+
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 619 |
+
# Β§8 β PROVIDER
|
| 620 |
+
# βββββββββββββββββοΏ½οΏ½οΏ½βββββββββββββββββββββββββββββββββββββββββ
|
| 621 |
+
class CloudflareProvider:
|
| 622 |
+
"""
|
| 623 |
+
βοΈ Cloudflare AI Playground β fully modular provider.
|
| 624 |
+
|
| 625 |
+
Virtual Display (required on headless servers):
|
| 626 |
+
Set VR_DISPLAY=1 before importing this module.
|
| 627 |
+
This starts Xvfb so Chrome has a real (invisible) display.
|
| 628 |
+
Headless Chrome is intentionally NOT used β Cloudflare blocks it.
|
| 629 |
+
|
| 630 |
+
$ export VR_DISPLAY=1
|
| 631 |
+
$ python server.py
|
| 632 |
+
|
| 633 |
+
Usage:
|
| 634 |
+
provider = CloudflareProvider()
|
| 635 |
+
for chunk in provider.chat(data="Hello!"):
|
| 636 |
+
print(chunk, end="")
|
| 637 |
+
|
| 638 |
+
# non-streaming:
|
| 639 |
+
response = provider.ask("What is 2+2?")
|
| 640 |
+
"""
|
| 641 |
+
|
| 642 |
+
def __init__(
|
| 643 |
+
self,
|
| 644 |
+
model: str = "@cf/moonshotai/kimi-k2.5",
|
| 645 |
+
system: str = "You are a helpful assistant.",
|
| 646 |
+
temperature: float = 1.0,
|
| 647 |
+
max_tokens: int = None,
|
| 648 |
+
timeout_init: int = 120,
|
| 649 |
+
timeout_idle: int = 30,
|
| 650 |
+
use_cache: bool = True,
|
| 651 |
+
debug: bool = False,
|
| 652 |
+
):
|
| 653 |
+
self.model = _resolve_model(model)
|
| 654 |
+
self.system = system
|
| 655 |
+
self.temperature = temperature
|
| 656 |
+
self.max_tokens = max_tokens
|
| 657 |
+
self.timeout_init = timeout_init
|
| 658 |
+
self.timeout_idle = timeout_idle
|
| 659 |
+
self.use_cache = use_cache
|
| 660 |
+
self.debug = debug
|
| 661 |
+
|
| 662 |
+
self.history: list[dict] = []
|
| 663 |
+
self.models: list[dict] = []
|
| 664 |
+
self._chat_models: list[dict] = []
|
| 665 |
+
self.last_response: str = ""
|
| 666 |
+
self.last_reasoning: str = ""
|
| 667 |
+
|
| 668 |
+
self._sid: str = ""
|
| 669 |
+
self._pk: str = ""
|
| 670 |
+
self._transport = None
|
| 671 |
+
self._mode: str = ""
|
| 672 |
+
self._on: bool = False
|
| 673 |
+
|
| 674 |
+
self._boot()
|
| 675 |
+
atexit.register(self.close)
|
| 676 |
+
|
| 677 |
+
# βββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 678 |
+
# Logging
|
| 679 |
+
# βββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 680 |
+
def _d(self, *a):
|
| 681 |
+
if self.debug:
|
| 682 |
+
print("[cloudflare]", *a, file=sys.stderr, flush=True)
|
| 683 |
+
|
| 684 |
+
# βββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 685 |
+
# Low-level WS
|
| 686 |
+
# βββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 687 |
+
def _pull(self) -> list[str]:
|
| 688 |
+
msgs = self._transport.recv()
|
| 689 |
+
if self.debug:
|
| 690 |
+
for m in msgs:
|
| 691 |
+
self._d("β", str(m)[:160])
|
| 692 |
+
return msgs
|
| 693 |
+
|
| 694 |
+
def _push(self, obj):
|
| 695 |
+
raw = json.dumps(obj, ensure_ascii=False)
|
| 696 |
+
self._d("β", raw[:300])
|
| 697 |
+
if not self._transport.send(raw):
|
| 698 |
+
raise RuntimeError("WebSocket send failed")
|
| 699 |
+
|
| 700 |
+
# βββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 701 |
+
# Boot β tries direct WS first, then Xvfb browser
|
| 702 |
+
# βββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 703 |
+
def _boot(self):
|
| 704 |
+
self._sid = _make_sid()
|
| 705 |
+
self._pk = _make_pk()
|
| 706 |
+
url = _make_ws_url(self._sid, self._pk)
|
| 707 |
+
|
| 708 |
+
# ββ Attempt 1: direct Python WebSocket ββββββ
|
| 709 |
+
try:
|
| 710 |
+
self._d("Trying direct Python WebSocket...")
|
| 711 |
+
t = _DirectTransport(debug=self.debug)
|
| 712 |
+
t.connect(url)
|
| 713 |
+
|
| 714 |
+
time.sleep(0.3)
|
| 715 |
+
if t.alive:
|
| 716 |
+
self._transport = t
|
| 717 |
+
self._mode = "direct"
|
| 718 |
+
self._d("β Direct connection β no browser needed!")
|
| 719 |
+
else:
|
| 720 |
+
t.close()
|
| 721 |
+
raise ConnectionError("Direct WS not alive after connect")
|
| 722 |
+
|
| 723 |
+
except Exception as e:
|
| 724 |
+
self._d(f"Direct failed: {e}")
|
| 725 |
+
self._d("Falling back to browser transport (Xvfb)...")
|
| 726 |
+
|
| 727 |
+
# ββ Attempt 2: Xvfb + Chrome relay ββββββ
|
| 728 |
+
try:
|
| 729 |
+
t = _BrowserTransport(debug=self.debug)
|
| 730 |
+
t.connect(url)
|
| 731 |
+
self._transport = t
|
| 732 |
+
self._mode = "browser"
|
| 733 |
+
self._d("β Browser transport connected")
|
| 734 |
+
vd = _VirtualDisplayManager.instance()
|
| 735 |
+
if vd.running:
|
| 736 |
+
self._d(f" ββ {vd}")
|
| 737 |
+
except Exception as e2:
|
| 738 |
+
raise ConnectionError(
|
| 739 |
+
f"All connection methods failed.\n"
|
| 740 |
+
f" Direct: {e}\n"
|
| 741 |
+
f" Browser: {e2}\n"
|
| 742 |
+
f" Tip: ensure VR_DISPLAY=1 and xvfb is installed."
|
| 743 |
+
) from e2
|
| 744 |
+
|
| 745 |
+
self._on = True
|
| 746 |
+
|
| 747 |
+
# ββ Handshake ββββββββββββββββββββββββββββββ
|
| 748 |
+
want = {"cf_agent_identity", "cf_agent_state", "cf_agent_mcp_servers"}
|
| 749 |
+
seen = set()
|
| 750 |
+
deadline = time.time() + 10
|
| 751 |
+
while time.time() < deadline and seen < want:
|
| 752 |
+
for raw in self._pull():
|
| 753 |
+
try:
|
| 754 |
+
seen.add(json.loads(raw).get("type", ""))
|
| 755 |
+
except Exception:
|
| 756 |
+
pass
|
| 757 |
+
time.sleep(0.05)
|
| 758 |
+
|
| 759 |
+
self._d(f"Handshake received: {seen}")
|
| 760 |
+
|
| 761 |
+
self._push({"type": "cf_agent_stream_resume_request"})
|
| 762 |
+
time.sleep(0.3)
|
| 763 |
+
self._pull()
|
| 764 |
+
|
| 765 |
+
# ββ Models + state βββββββββββββββββββββββββ
|
| 766 |
+
self._load_models()
|
| 767 |
+
if self.max_tokens is None:
|
| 768 |
+
self.max_tokens = self._ctx_window(self.model)
|
| 769 |
+
self._sync()
|
| 770 |
+
|
| 771 |
+
# βββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 772 |
+
# Models
|
| 773 |
+
# βββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 774 |
+
def _load_models(self):
|
| 775 |
+
if self.use_cache:
|
| 776 |
+
cached = _Cache.load()
|
| 777 |
+
if cached:
|
| 778 |
+
self.models = cached
|
| 779 |
+
self._chat_models = [
|
| 780 |
+
m for m in self.models
|
| 781 |
+
if m.get("task", {}).get("name") == "Text Generation"
|
| 782 |
+
]
|
| 783 |
+
self._d(f"Loaded {len(self._chat_models)} chat models from cache")
|
| 784 |
+
return
|
| 785 |
+
self._fetch_models()
|
| 786 |
+
if self.models and self.use_cache:
|
| 787 |
+
_Cache.save(self.models)
|
| 788 |
+
|
| 789 |
+
def _fetch_models(self):
|
| 790 |
+
rid = str(uuid.uuid4())
|
| 791 |
+
self._push({"args": [], "id": rid, "method": "getModels", "type": "rpc"})
|
| 792 |
+
|
| 793 |
+
deadline = time.time() + 15
|
| 794 |
+
while time.time() < deadline:
|
| 795 |
+
for raw in self._pull():
|
| 796 |
+
try:
|
| 797 |
+
d = json.loads(raw)
|
| 798 |
+
except Exception:
|
| 799 |
+
continue
|
| 800 |
+
if (d.get("type") == "rpc" and d.get("id") == rid
|
| 801 |
+
and d.get("done") and d.get("success")):
|
| 802 |
+
self.models = d.get("result", [])
|
| 803 |
+
self._chat_models = [
|
| 804 |
+
m for m in self.models
|
| 805 |
+
if m.get("task", {}).get("name") == "Text Generation"
|
| 806 |
+
]
|
| 807 |
+
self._d(f"Fetched {len(self._chat_models)} chat models")
|
| 808 |
+
return
|
| 809 |
+
time.sleep(0.05)
|
| 810 |
+
self._d("Warning: model fetch timed out")
|
| 811 |
+
|
| 812 |
+
def _ctx_window(self, model_name: str) -> int:
|
| 813 |
+
for m in self._chat_models:
|
| 814 |
+
if m.get("name") == model_name:
|
| 815 |
+
for p in m.get("properties", []):
|
| 816 |
+
if p.get("property_id") == "context_window":
|
| 817 |
+
try:
|
| 818 |
+
return int(p["value"])
|
| 819 |
+
except (ValueError, KeyError):
|
| 820 |
+
pass
|
| 821 |
+
return 4096
|
| 822 |
+
|
| 823 |
+
def _resolve(self, name: str) -> str:
|
| 824 |
+
if not name:
|
| 825 |
+
return name
|
| 826 |
+
if name.startswith("@cf/") or name.startswith("@hf/"):
|
| 827 |
+
return name
|
| 828 |
+
for m in self._chat_models:
|
| 829 |
+
full = m.get("name", "")
|
| 830 |
+
short = full.rsplit("/", 1)[-1]
|
| 831 |
+
if short == name or full == name:
|
| 832 |
+
return full
|
| 833 |
+
return _SHORT_TO_FULL.get(name, name)
|
| 834 |
+
|
| 835 |
+
# βββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 836 |
+
# State Sync
|
| 837 |
+
# βββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 838 |
+
def _sync(self):
|
| 839 |
+
self._push({
|
| 840 |
+
"type": "cf_agent_state",
|
| 841 |
+
"state": {
|
| 842 |
+
"model": self.model,
|
| 843 |
+
"temperature": self.temperature,
|
| 844 |
+
"stream": True,
|
| 845 |
+
"system": self.system,
|
| 846 |
+
"useExternalProvider": False,
|
| 847 |
+
"externalProvider": "openai",
|
| 848 |
+
"authMethod": "provider-key",
|
| 849 |
+
},
|
| 850 |
+
})
|
| 851 |
+
time.sleep(0.15)
|
| 852 |
+
self._pull()
|
| 853 |
+
|
| 854 |
+
# βββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 855 |
+
# Setters
|
| 856 |
+
# βββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 857 |
+
def set_model(self, name: str) -> "CloudflareProvider":
|
| 858 |
+
full = self._resolve(name)
|
| 859 |
+
self.model = full
|
| 860 |
+
self.max_tokens = self._ctx_window(full)
|
| 861 |
+
if self._on:
|
| 862 |
+
self._sync()
|
| 863 |
+
return self
|
| 864 |
+
|
| 865 |
+
def set_system(self, prompt: str) -> "CloudflareProvider":
|
| 866 |
+
self.system = prompt
|
| 867 |
+
if self._on:
|
| 868 |
+
self._sync()
|
| 869 |
+
return self
|
| 870 |
+
|
| 871 |
+
def set_temperature(self, t: float) -> "CloudflareProvider":
|
| 872 |
+
self.temperature = max(0.0, min(2.0, t))
|
| 873 |
+
if self._on:
|
| 874 |
+
self._sync()
|
| 875 |
+
return self
|
| 876 |
+
|
| 877 |
+
def set_max_tokens(self, n: int) -> "CloudflareProvider":
|
| 878 |
+
self.max_tokens = n
|
| 879 |
+
return self
|
| 880 |
+
|
| 881 |
+
def clear_history(self):
|
| 882 |
+
self.history.clear()
|
| 883 |
+
|
| 884 |
+
def get_history(self) -> list[dict]:
|
| 885 |
+
return _Conv.to_openai(self.history, self.system)
|
| 886 |
+
|
| 887 |
+
# βββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 888 |
+
# Model listing
|
| 889 |
+
# βββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 890 |
+
def list_models(self) -> list[dict]:
|
| 891 |
+
return [{
|
| 892 |
+
"name": m.get("name", ""),
|
| 893 |
+
"short": m.get("name", "").rsplit("/", 1)[-1],
|
| 894 |
+
"context": self._ctx_window(m.get("name", "")),
|
| 895 |
+
"active": m.get("name") == self.model,
|
| 896 |
+
} for m in self._chat_models]
|
| 897 |
+
|
| 898 |
+
def refresh_models(self):
|
| 899 |
+
_Cache.clear()
|
| 900 |
+
self._fetch_models()
|
| 901 |
+
if self.models and self.use_cache:
|
| 902 |
+
_Cache.save(self.models)
|
| 903 |
+
|
| 904 |
+
# βββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 905 |
+
# β
CHAT (streaming generator)
|
| 906 |
+
# βββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 907 |
+
def chat(
|
| 908 |
+
self,
|
| 909 |
+
data: str = None,
|
| 910 |
+
messages: list[dict] = None,
|
| 911 |
+
model: str = None,
|
| 912 |
+
temperature: float = None,
|
| 913 |
+
system: str = None,
|
| 914 |
+
max_tokens: int = None,
|
| 915 |
+
) -> Generator[str, None, None]:
|
| 916 |
+
if not self._on:
|
| 917 |
+
raise RuntimeError("Not connected β call new_session()")
|
| 918 |
+
if not messages and not data:
|
| 919 |
+
raise ValueError("Provide 'messages' or 'data'")
|
| 920 |
+
|
| 921 |
+
changed = False
|
| 922 |
+
|
| 923 |
+
if model:
|
| 924 |
+
full = self._resolve(model)
|
| 925 |
+
if full != self.model:
|
| 926 |
+
self.model = full
|
| 927 |
+
self.max_tokens = self._ctx_window(full)
|
| 928 |
+
changed = True
|
| 929 |
+
|
| 930 |
+
if temperature is not None and temperature != self.temperature:
|
| 931 |
+
self.temperature = max(0.0, min(2.0, temperature))
|
| 932 |
+
changed = True
|
| 933 |
+
|
| 934 |
+
if system and system != self.system:
|
| 935 |
+
self.system = system
|
| 936 |
+
changed = True
|
| 937 |
+
|
| 938 |
+
if max_tokens is not None:
|
| 939 |
+
self.max_tokens = max_tokens
|
| 940 |
+
|
| 941 |
+
if messages:
|
| 942 |
+
sys_p, cf_msgs = _Conv.to_cf(messages)
|
| 943 |
+
if sys_p and sys_p != self.system:
|
| 944 |
+
self.system = sys_p
|
| 945 |
+
changed = True
|
| 946 |
+
self.history = cf_msgs
|
| 947 |
+
else:
|
| 948 |
+
self.history.append(_Build.user(data))
|
| 949 |
+
|
| 950 |
+
if changed:
|
| 951 |
+
self._sync()
|
| 952 |
+
|
| 953 |
+
self._push(_Build.req(self.history))
|
| 954 |
+
|
| 955 |
+
full_text = ""
|
| 956 |
+
reasoning = ""
|
| 957 |
+
error = None
|
| 958 |
+
got_first = False
|
| 959 |
+
done = False
|
| 960 |
+
last_data = time.time()
|
| 961 |
+
reasoning_open = False
|
| 962 |
+
|
| 963 |
+
while not done:
|
| 964 |
+
if not self._transport.alive:
|
| 965 |
+
self._d("Transport died mid-stream")
|
| 966 |
+
if not full_text:
|
| 967 |
+
yield "[Connection lost]\n"
|
| 968 |
+
break
|
| 969 |
+
|
| 970 |
+
msgs = self._pull()
|
| 971 |
+
|
| 972 |
+
if not msgs:
|
| 973 |
+
elapsed = time.time() - last_data
|
| 974 |
+
limit = self.timeout_idle if got_first else self.timeout_init
|
| 975 |
+
if elapsed > limit:
|
| 976 |
+
self._d(f"Timeout after {elapsed:.1f}s")
|
| 977 |
+
if not full_text:
|
| 978 |
+
yield "[Timeout β no response received]\n"
|
| 979 |
+
break
|
| 980 |
+
time.sleep(0.015 if got_first else 0.04)
|
| 981 |
+
continue
|
| 982 |
+
|
| 983 |
+
last_data = time.time()
|
| 984 |
+
|
| 985 |
+
for raw in msgs:
|
| 986 |
+
try:
|
| 987 |
+
f = json.loads(raw)
|
| 988 |
+
except Exception:
|
| 989 |
+
continue
|
| 990 |
+
|
| 991 |
+
ftype = f.get("type", "")
|
| 992 |
+
if ftype != "cf_agent_use_chat_response":
|
| 993 |
+
continue
|
| 994 |
+
|
| 995 |
+
body_str = f.get("body", "")
|
| 996 |
+
if body_str:
|
| 997 |
+
try:
|
| 998 |
+
b = json.loads(body_str)
|
| 999 |
+
except Exception:
|
| 1000 |
+
continue
|
| 1001 |
+
|
| 1002 |
+
bt = b.get("type", "")
|
| 1003 |
+
|
| 1004 |
+
if bt == "reasoning-start":
|
| 1005 |
+
reasoning_open = True
|
| 1006 |
+
got_first = True
|
| 1007 |
+
yield "<think>\n"
|
| 1008 |
+
|
| 1009 |
+
elif bt == "reasoning-delta":
|
| 1010 |
+
delta = b.get("delta", "")
|
| 1011 |
+
if delta:
|
| 1012 |
+
reasoning += delta
|
| 1013 |
+
got_first = True
|
| 1014 |
+
yield delta
|
| 1015 |
+
|
| 1016 |
+
elif bt == "reasoning-end":
|
| 1017 |
+
if reasoning_open:
|
| 1018 |
+
reasoning_open = False
|
| 1019 |
+
yield "\n</think>\n\n"
|
| 1020 |
+
|
| 1021 |
+
elif bt == "text-delta":
|
| 1022 |
+
delta = b.get("delta", "")
|
| 1023 |
+
if reasoning_open:
|
| 1024 |
+
reasoning_open = False
|
| 1025 |
+
yield "\n</think>\n\n"
|
| 1026 |
+
if delta:
|
| 1027 |
+
full_text += delta
|
| 1028 |
+
got_first = True
|
| 1029 |
+
yield delta
|
| 1030 |
+
|
| 1031 |
+
elif bt == "error":
|
| 1032 |
+
error = b.get("message", str(b))
|
| 1033 |
+
|
| 1034 |
+
if f.get("done", False):
|
| 1035 |
+
done = True
|
| 1036 |
+
break
|
| 1037 |
+
|
| 1038 |
+
if reasoning_open:
|
| 1039 |
+
yield "\n</think>\n\n"
|
| 1040 |
+
|
| 1041 |
+
if error:
|
| 1042 |
+
self._d("Server error:", error)
|
| 1043 |
+
if not full_text:
|
| 1044 |
+
yield f"\n[Error: {error}]\n"
|
| 1045 |
+
|
| 1046 |
+
if full_text:
|
| 1047 |
+
self.history.append(_Build.asst(full_text, reasoning))
|
| 1048 |
+
|
| 1049 |
+
self.last_response = full_text
|
| 1050 |
+
self.last_reasoning = reasoning
|
| 1051 |
+
|
| 1052 |
+
# βββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 1053 |
+
# ask() β non-streaming convenience
|
| 1054 |
+
# βββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 1055 |
+
def ask(self, prompt: str, **kwargs) -> str:
|
| 1056 |
+
return "".join(self.chat(data=prompt, **kwargs))
|
| 1057 |
+
|
| 1058 |
+
# βββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 1059 |
+
# Session management
|
| 1060 |
+
# βββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 1061 |
+
def new_session(self):
|
| 1062 |
+
self._close_transport()
|
| 1063 |
+
self.history.clear()
|
| 1064 |
+
self._boot()
|
| 1065 |
+
|
| 1066 |
+
def _close_transport(self):
|
| 1067 |
+
if self._transport:
|
| 1068 |
+
try:
|
| 1069 |
+
self._transport.close()
|
| 1070 |
+
except Exception:
|
| 1071 |
+
pass
|
| 1072 |
+
self._transport = None
|
| 1073 |
+
self._on = False
|
| 1074 |
+
|
| 1075 |
+
def close(self):
|
| 1076 |
+
self._close_transport()
|
| 1077 |
+
if self._mode == "browser":
|
| 1078 |
+
vd = _VirtualDisplayManager.instance()
|
| 1079 |
+
vd.stop()
|
| 1080 |
+
self._d("Closed.")
|
| 1081 |
+
|
| 1082 |
+
def __enter__(self):
|
| 1083 |
+
return self
|
| 1084 |
+
|
| 1085 |
+
def __exit__(self, *_):
|
| 1086 |
+
self.close()
|
| 1087 |
+
|
| 1088 |
+
def __del__(self):
|
| 1089 |
+
try:
|
| 1090 |
+
self.close()
|
| 1091 |
+
except Exception:
|
| 1092 |
+
pass
|
| 1093 |
+
|
| 1094 |
+
def __repr__(self):
|
| 1095 |
+
s = "β
" if self._on else "β"
|
| 1096 |
+
vd = _VirtualDisplayManager.instance()
|
| 1097 |
+
vd_info = f" vdisplay={vd}" if vd.enabled else ""
|
| 1098 |
+
return (
|
| 1099 |
+
f"CloudflareProvider({s} mode={self._mode!r} "
|
| 1100 |
+
f"model={self.model!r} max_tokens={self.max_tokens}{vd_info})"
|
| 1101 |
+
)
|
| 1102 |
+
|
| 1103 |
+
|
| 1104 |
+
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 1105 |
+
# Β§9 β PROCESS-EXIT CLEANUP
|
| 1106 |
+
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 1107 |
+
def _cleanup_virtual_display():
|
| 1108 |
+
try:
|
| 1109 |
+
vd = _VirtualDisplayManager.instance()
|
| 1110 |
+
vd.stop()
|
| 1111 |
+
except Exception:
|
| 1112 |
+
pass
|
| 1113 |
+
|
| 1114 |
+
atexit.register(_cleanup_virtual_display)
|
requirements.txt
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# βββ Web framework ββββββββββββββββββββββββββββββ
|
| 2 |
+
fastapi>=0.111.0
|
| 3 |
+
uvicorn[standard]>=0.29.0
|
| 4 |
+
pydantic>=2.0.0
|
| 5 |
+
|
| 6 |
+
# βββ Cloudflare provider deps βββββββββββββββββββ
|
| 7 |
+
websocket-client>=1.8.0
|
| 8 |
+
DrissionPage>=4.1.0
|
| 9 |
+
|
| 10 |
+
# βββ Virtual display (Xvfb wrapper) βββββββββββββ
|
| 11 |
+
PyVirtualDisplay>=3.0
|
server.py
ADDED
|
@@ -0,0 +1,634 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 3 |
+
β server.py β Cloudflare AI REST API β
|
| 4 |
+
β β
|
| 5 |
+
β OpenAI-compatible endpoints: β
|
| 6 |
+
β POST /v1/chat/completions (streaming + non-streaming) β
|
| 7 |
+
β GET /v1/models β
|
| 8 |
+
β GET /health β
|
| 9 |
+
β GET / β
|
| 10 |
+
β β
|
| 11 |
+
β Architecture: β
|
| 12 |
+
β β’ ProviderPool β N pre-warmed WS connections β
|
| 13 |
+
β β’ acquire() β queue-based fair checkout, auto-heal β
|
| 14 |
+
β β’ HealthMonitor β periodic background probe + heal β
|
| 15 |
+
β β’ SSE streaming β threadβasyncio bridge via Queue β
|
| 16 |
+
βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 17 |
+
"""
|
| 18 |
+
|
| 19 |
+
import asyncio
|
| 20 |
+
import json
|
| 21 |
+
import logging
|
| 22 |
+
import os
|
| 23 |
+
import sys
|
| 24 |
+
import threading
|
| 25 |
+
import time
|
| 26 |
+
import uuid
|
| 27 |
+
from contextlib import asynccontextmanager
|
| 28 |
+
from typing import AsyncGenerator, List, Optional
|
| 29 |
+
|
| 30 |
+
import uvicorn
|
| 31 |
+
from fastapi import FastAPI, HTTPException, Request
|
| 32 |
+
from fastapi.middleware.cors import CORSMiddleware
|
| 33 |
+
from fastapi.responses import JSONResponse, StreamingResponse
|
| 34 |
+
from pydantic import BaseModel, Field
|
| 35 |
+
|
| 36 |
+
# βββ Import provider ββββββββββββββββββββββββββββββββββββββββ
|
| 37 |
+
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
| 38 |
+
from cloudflare_provider import CloudflareProvider
|
| 39 |
+
|
| 40 |
+
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 41 |
+
# LOGGING
|
| 42 |
+
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 43 |
+
logging.basicConfig(
|
| 44 |
+
level=logging.INFO,
|
| 45 |
+
format="%(asctime)s %(levelname)-8s %(message)s",
|
| 46 |
+
stream=sys.stdout,
|
| 47 |
+
datefmt="%H:%M:%S",
|
| 48 |
+
)
|
| 49 |
+
log = logging.getLogger("cf-api")
|
| 50 |
+
|
| 51 |
+
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 52 |
+
# CONFIG (all tunable via environment variables)
|
| 53 |
+
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 54 |
+
POOL_SIZE = int(os.getenv("POOL_SIZE", "2"))
|
| 55 |
+
PORT = int(os.getenv("PORT", "7860"))
|
| 56 |
+
HOST = os.getenv("HOST", "0.0.0.0")
|
| 57 |
+
HEALTH_INTERVAL = int(os.getenv("HEALTH_INTERVAL", "60")) # seconds
|
| 58 |
+
ACQUIRE_TIMEOUT = int(os.getenv("ACQUIRE_TIMEOUT", "60")) # wait for free slot
|
| 59 |
+
STREAM_TIMEOUT = int(os.getenv("STREAM_TIMEOUT", "120")) # total stream timeout
|
| 60 |
+
DEFAULT_MODEL = os.getenv("DEFAULT_MODEL", "@cf/moonshotai/kimi-k2.5")
|
| 61 |
+
DEFAULT_SYSTEM = os.getenv("DEFAULT_SYSTEM", "You are a helpful assistant.")
|
| 62 |
+
|
| 63 |
+
|
| 64 |
+
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 65 |
+
# PYDANTIC SCHEMAS
|
| 66 |
+
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 67 |
+
class Message(BaseModel):
|
| 68 |
+
role: str
|
| 69 |
+
content: str
|
| 70 |
+
|
| 71 |
+
class ChatRequest(BaseModel):
|
| 72 |
+
model: str = DEFAULT_MODEL
|
| 73 |
+
messages: List[Message]
|
| 74 |
+
temperature: float = Field(default=1.0, ge=0.0, le=2.0)
|
| 75 |
+
max_tokens: Optional[int] = None
|
| 76 |
+
stream: bool = True
|
| 77 |
+
system: Optional[str] = None # extra system-prompt override
|
| 78 |
+
|
| 79 |
+
class CompletionChoice(BaseModel):
|
| 80 |
+
index: int
|
| 81 |
+
message: dict
|
| 82 |
+
finish_reason: str
|
| 83 |
+
|
| 84 |
+
class CompletionResponse(BaseModel):
|
| 85 |
+
id: str
|
| 86 |
+
object: str
|
| 87 |
+
created: int
|
| 88 |
+
model: str
|
| 89 |
+
choices: List[CompletionChoice]
|
| 90 |
+
usage: dict
|
| 91 |
+
|
| 92 |
+
|
| 93 |
+
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 94 |
+
# MANAGED PROVIDER (pool slot)
|
| 95 |
+
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 96 |
+
class ManagedProvider:
|
| 97 |
+
"""A single pool slot wrapping one CloudflareProvider instance."""
|
| 98 |
+
|
| 99 |
+
def __init__(self, slot_id: int):
|
| 100 |
+
self.slot_id = slot_id
|
| 101 |
+
self.provider: Optional[CloudflareProvider] = None
|
| 102 |
+
self.busy = False
|
| 103 |
+
self.born_at = 0.0
|
| 104 |
+
self.error_count = 0
|
| 105 |
+
self.request_count = 0
|
| 106 |
+
|
| 107 |
+
# ββ Health ββββββββββββββββββββββββββββββββββββββ
|
| 108 |
+
def is_healthy(self) -> bool:
|
| 109 |
+
if self.provider is None:
|
| 110 |
+
return False
|
| 111 |
+
try:
|
| 112 |
+
return (
|
| 113 |
+
self.provider._on
|
| 114 |
+
and self.provider._transport is not None
|
| 115 |
+
and self.provider._transport.alive
|
| 116 |
+
)
|
| 117 |
+
except Exception:
|
| 118 |
+
return False
|
| 119 |
+
|
| 120 |
+
# ββ Teardown ββββββββββββββββββββββββββββββββββββ
|
| 121 |
+
def close(self):
|
| 122 |
+
p = self.provider
|
| 123 |
+
self.provider = None
|
| 124 |
+
if p:
|
| 125 |
+
try:
|
| 126 |
+
p.close()
|
| 127 |
+
except Exception:
|
| 128 |
+
pass
|
| 129 |
+
|
| 130 |
+
def __repr__(self):
|
| 131 |
+
state = "busy" if self.busy else ("ok" if self.is_healthy() else "dead")
|
| 132 |
+
mode = self.provider._mode if self.provider else "none"
|
| 133 |
+
return (
|
| 134 |
+
f"<Slot#{self.slot_id} {state} mode={mode!r} "
|
| 135 |
+
f"reqs={self.request_count} errs={self.error_count}>"
|
| 136 |
+
)
|
| 137 |
+
|
| 138 |
+
|
| 139 |
+
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 140 |
+
# PROVIDER POOL
|
| 141 |
+
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 142 |
+
class ProviderPool:
|
| 143 |
+
"""
|
| 144 |
+
Pre-warmed pool of CloudflareProvider connections.
|
| 145 |
+
|
| 146 |
+
β’ initialize() β create all slots at startup
|
| 147 |
+
β’ acquire() β async context manager; blocks until a free slot
|
| 148 |
+
β’ health_monitor β background coroutine; heals broken idle slots
|
| 149 |
+
β’ shutdown() β clean teardown
|
| 150 |
+
"""
|
| 151 |
+
|
| 152 |
+
def __init__(self, size: int = 2):
|
| 153 |
+
self.size = size
|
| 154 |
+
self._slots: List[ManagedProvider] = []
|
| 155 |
+
self._queue: asyncio.Queue = None # set in initialize()
|
| 156 |
+
self._loop: asyncio.AbstractEventLoop = None
|
| 157 |
+
self._lock = asyncio.Lock()
|
| 158 |
+
|
| 159 |
+
# βββ Startup ββββββββββββββββββββββββββββββββββ
|
| 160 |
+
async def initialize(self):
|
| 161 |
+
self._loop = asyncio.get_event_loop()
|
| 162 |
+
self._queue = asyncio.Queue(maxsize=self.size)
|
| 163 |
+
|
| 164 |
+
log.info(f"π Initializing provider pool (slots={self.size})")
|
| 165 |
+
|
| 166 |
+
results = await asyncio.gather(
|
| 167 |
+
*[self._spawn_slot(i) for i in range(self.size)],
|
| 168 |
+
return_exceptions=True,
|
| 169 |
+
)
|
| 170 |
+
|
| 171 |
+
ok = sum(1 for r in results if not isinstance(r, Exception))
|
| 172 |
+
log.info(f" Pool ready β {ok}/{self.size} slots healthy")
|
| 173 |
+
|
| 174 |
+
if ok == 0:
|
| 175 |
+
raise RuntimeError(
|
| 176 |
+
"No provider slots could connect. Check network / Xvfb setup."
|
| 177 |
+
)
|
| 178 |
+
|
| 179 |
+
async def _spawn_slot(self, slot_id: int) -> ManagedProvider:
|
| 180 |
+
managed = ManagedProvider(slot_id)
|
| 181 |
+
|
| 182 |
+
def _create() -> CloudflareProvider:
|
| 183 |
+
log.info(f" [S{slot_id}] Connecting...")
|
| 184 |
+
return CloudflareProvider(
|
| 185 |
+
model = DEFAULT_MODEL,
|
| 186 |
+
system = DEFAULT_SYSTEM,
|
| 187 |
+
debug = False,
|
| 188 |
+
use_cache = True,
|
| 189 |
+
)
|
| 190 |
+
|
| 191 |
+
managed.provider = await asyncio.wait_for(
|
| 192 |
+
self._loop.run_in_executor(None, _create),
|
| 193 |
+
timeout=180,
|
| 194 |
+
)
|
| 195 |
+
managed.born_at = time.time()
|
| 196 |
+
|
| 197 |
+
self._slots.append(managed)
|
| 198 |
+
await self._queue.put(managed)
|
| 199 |
+
|
| 200 |
+
mode = managed.provider._mode
|
| 201 |
+
log.info(f" [S{slot_id}] β Ready mode={mode!r}")
|
| 202 |
+
return managed
|
| 203 |
+
|
| 204 |
+
# βββ Acquire ββββββββββββββββββββββββββββββββββ
|
| 205 |
+
@asynccontextmanager
|
| 206 |
+
async def acquire(self):
|
| 207 |
+
"""Checkout a provider, yield it, return on exit (healing if needed)."""
|
| 208 |
+
managed: ManagedProvider = await asyncio.wait_for(
|
| 209 |
+
self._queue.get(),
|
| 210 |
+
timeout=ACQUIRE_TIMEOUT,
|
| 211 |
+
)
|
| 212 |
+
managed.busy = True
|
| 213 |
+
ok = True
|
| 214 |
+
|
| 215 |
+
try:
|
| 216 |
+
# Heal before handing out
|
| 217 |
+
if not managed.is_healthy():
|
| 218 |
+
log.warning(f"[S{managed.slot_id}] Unhealthy β healing before use")
|
| 219 |
+
await self._heal(managed)
|
| 220 |
+
|
| 221 |
+
managed.request_count += 1
|
| 222 |
+
yield managed.provider
|
| 223 |
+
|
| 224 |
+
except Exception:
|
| 225 |
+
managed.error_count += 1
|
| 226 |
+
ok = False
|
| 227 |
+
raise
|
| 228 |
+
|
| 229 |
+
finally:
|
| 230 |
+
managed.busy = False
|
| 231 |
+
# After use: return if healthy, else heal in background
|
| 232 |
+
if managed.is_healthy():
|
| 233 |
+
await self._queue.put(managed)
|
| 234 |
+
else:
|
| 235 |
+
log.warning(f"[S{managed.slot_id}] Unhealthy after use β background heal")
|
| 236 |
+
asyncio.create_task(self._heal_then_return(managed))
|
| 237 |
+
|
| 238 |
+
# βββ Healing ββββββββββββββββββββββββββββββββββ
|
| 239 |
+
async def _heal(self, managed: ManagedProvider):
|
| 240 |
+
sid = managed.slot_id
|
| 241 |
+
|
| 242 |
+
def _recreate() -> CloudflareProvider:
|
| 243 |
+
managed.close()
|
| 244 |
+
return CloudflareProvider(
|
| 245 |
+
model = DEFAULT_MODEL,
|
| 246 |
+
system = DEFAULT_SYSTEM,
|
| 247 |
+
debug = False,
|
| 248 |
+
use_cache = True,
|
| 249 |
+
)
|
| 250 |
+
|
| 251 |
+
managed.provider = await asyncio.wait_for(
|
| 252 |
+
self._loop.run_in_executor(None, _recreate),
|
| 253 |
+
timeout=180,
|
| 254 |
+
)
|
| 255 |
+
managed.born_at = time.time()
|
| 256 |
+
managed.error_count = 0
|
| 257 |
+
log.info(f"[S{sid}] β Healed mode={managed.provider._mode!r}")
|
| 258 |
+
|
| 259 |
+
async def _heal_then_return(self, managed: ManagedProvider):
|
| 260 |
+
try:
|
| 261 |
+
await self._heal(managed)
|
| 262 |
+
except Exception as e:
|
| 263 |
+
log.error(f"[S{managed.slot_id}] Heal failed: {e}")
|
| 264 |
+
# Try a brand-new slot as last resort
|
| 265 |
+
try:
|
| 266 |
+
managed.close()
|
| 267 |
+
managed.provider = await asyncio.wait_for(
|
| 268 |
+
self._loop.run_in_executor(
|
| 269 |
+
None,
|
| 270 |
+
lambda: CloudflareProvider(
|
| 271 |
+
model=DEFAULT_MODEL, system=DEFAULT_SYSTEM,
|
| 272 |
+
debug=False, use_cache=True,
|
| 273 |
+
),
|
| 274 |
+
),
|
| 275 |
+
timeout=180,
|
| 276 |
+
)
|
| 277 |
+
managed.born_at = time.time()
|
| 278 |
+
managed.error_count = 0
|
| 279 |
+
log.info(f"[S{managed.slot_id}] β Cold-boot recovery succeeded")
|
| 280 |
+
except Exception as e2:
|
| 281 |
+
log.error(f"[S{managed.slot_id}] Cold-boot also failed: {e2}")
|
| 282 |
+
|
| 283 |
+
await self._queue.put(managed)
|
| 284 |
+
|
| 285 |
+
# βββ Health monitor βββββββββββββββββββββββββββ
|
| 286 |
+
async def health_monitor(self):
|
| 287 |
+
"""Periodic background coroutine β checks and heals idle slots."""
|
| 288 |
+
while True:
|
| 289 |
+
await asyncio.sleep(HEALTH_INTERVAL)
|
| 290 |
+
healthy = sum(1 for m in self._slots if m.is_healthy())
|
| 291 |
+
busy = sum(1 for m in self._slots if m.busy)
|
| 292 |
+
log.info(
|
| 293 |
+
f"β₯ Health check β {healthy}/{self.size} healthy, "
|
| 294 |
+
f"{busy} busy, queue={self._queue.qsize()}"
|
| 295 |
+
)
|
| 296 |
+
|
| 297 |
+
for managed in list(self._slots):
|
| 298 |
+
if not managed.busy and not managed.is_healthy():
|
| 299 |
+
log.warning(f"[S{managed.slot_id}] Idle but unhealthy β healing")
|
| 300 |
+
# Pull from queue if it's still there, otherwise skip
|
| 301 |
+
asyncio.create_task(self._heal_then_return(managed))
|
| 302 |
+
|
| 303 |
+
# βββ Status βββββββββββββββββββββββββββββββββββ
|
| 304 |
+
@property
|
| 305 |
+
def status(self) -> dict:
|
| 306 |
+
return {
|
| 307 |
+
"pool_size": self.size,
|
| 308 |
+
"queue_free": self._queue.qsize() if self._queue else 0,
|
| 309 |
+
"slots": [
|
| 310 |
+
{
|
| 311 |
+
"id": m.slot_id,
|
| 312 |
+
"healthy": m.is_healthy(),
|
| 313 |
+
"busy": m.busy,
|
| 314 |
+
"mode": m.provider._mode if m.provider else "none",
|
| 315 |
+
"errors": m.error_count,
|
| 316 |
+
"requests": m.request_count,
|
| 317 |
+
"age_s": round(time.time() - m.born_at, 1) if m.born_at else 0,
|
| 318 |
+
}
|
| 319 |
+
for m in self._slots
|
| 320 |
+
],
|
| 321 |
+
}
|
| 322 |
+
|
| 323 |
+
# βββ Shutdown βββββββββββββββββββββββββββββββββ
|
| 324 |
+
async def shutdown(self):
|
| 325 |
+
log.info("Shutting down provider pool...")
|
| 326 |
+
for m in self._slots:
|
| 327 |
+
m.close()
|
| 328 |
+
log.info("Pool shut down.")
|
| 329 |
+
|
| 330 |
+
|
| 331 |
+
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 332 |
+
# GLOBAL POOL REFERENCE
|
| 333 |
+
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 334 |
+
pool: ProviderPool = None
|
| 335 |
+
|
| 336 |
+
|
| 337 |
+
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 338 |
+
# LIFESPAN (startup / shutdown)
|
| 339 |
+
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 340 |
+
@asynccontextmanager
|
| 341 |
+
async def lifespan(app: FastAPI):
|
| 342 |
+
global pool
|
| 343 |
+
pool = ProviderPool(size=POOL_SIZE)
|
| 344 |
+
await pool.initialize()
|
| 345 |
+
|
| 346 |
+
monitor = asyncio.create_task(pool.health_monitor())
|
| 347 |
+
log.info(f"β
Server ready on {HOST}:{PORT}")
|
| 348 |
+
|
| 349 |
+
yield
|
| 350 |
+
|
| 351 |
+
monitor.cancel()
|
| 352 |
+
try:
|
| 353 |
+
await monitor
|
| 354 |
+
except asyncio.CancelledError:
|
| 355 |
+
pass
|
| 356 |
+
await pool.shutdown()
|
| 357 |
+
|
| 358 |
+
|
| 359 |
+
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 360 |
+
# FASTAPI APP
|
| 361 |
+
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 362 |
+
app = FastAPI(
|
| 363 |
+
title = "Cloudflare AI API",
|
| 364 |
+
description = "OpenAI-compatible streaming API via Cloudflare AI Playground",
|
| 365 |
+
version = "1.0.0",
|
| 366 |
+
lifespan = lifespan,
|
| 367 |
+
docs_url = "/docs",
|
| 368 |
+
redoc_url = "/redoc",
|
| 369 |
+
)
|
| 370 |
+
|
| 371 |
+
app.add_middleware(
|
| 372 |
+
CORSMiddleware,
|
| 373 |
+
allow_origins = ["*"],
|
| 374 |
+
allow_methods = ["*"],
|
| 375 |
+
allow_headers = ["*"],
|
| 376 |
+
)
|
| 377 |
+
|
| 378 |
+
|
| 379 |
+
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 380 |
+
# SSE STREAMING HELPERS
|
| 381 |
+
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 382 |
+
def _sse_chunk(content: str, model: str, chunk_id: str) -> str:
|
| 383 |
+
"""Format one SSE data line in OpenAI chunk format."""
|
| 384 |
+
payload = {
|
| 385 |
+
"id": chunk_id,
|
| 386 |
+
"object": "chat.completion.chunk",
|
| 387 |
+
"created": int(time.time()),
|
| 388 |
+
"model": model,
|
| 389 |
+
"choices": [{
|
| 390 |
+
"index": 0,
|
| 391 |
+
"delta": {"content": content},
|
| 392 |
+
"finish_reason": None,
|
| 393 |
+
}],
|
| 394 |
+
}
|
| 395 |
+
return f"data: {json.dumps(payload, ensure_ascii=False)}\n\n"
|
| 396 |
+
|
| 397 |
+
|
| 398 |
+
def _sse_done(model: str, chunk_id: str) -> str:
|
| 399 |
+
"""Final SSE chunk with finish_reason=stop."""
|
| 400 |
+
payload = {
|
| 401 |
+
"id": chunk_id,
|
| 402 |
+
"object": "chat.completion.chunk",
|
| 403 |
+
"created": int(time.time()),
|
| 404 |
+
"model": model,
|
| 405 |
+
"choices": [{
|
| 406 |
+
"index": 0,
|
| 407 |
+
"delta": {},
|
| 408 |
+
"finish_reason": "stop",
|
| 409 |
+
}],
|
| 410 |
+
}
|
| 411 |
+
return f"data: {json.dumps(payload)}\n\ndata: [DONE]\n\n"
|
| 412 |
+
|
| 413 |
+
|
| 414 |
+
def _sse_error(msg: str) -> str:
|
| 415 |
+
return f"data: {{\"error\": {json.dumps(msg)}}}\n\ndata: [DONE]\n\n"
|
| 416 |
+
|
| 417 |
+
|
| 418 |
+
async def _stream_generator(
|
| 419 |
+
provider: CloudflareProvider,
|
| 420 |
+
req: ChatRequest,
|
| 421 |
+
) -> AsyncGenerator[str, None]:
|
| 422 |
+
"""
|
| 423 |
+
Bridge between the synchronous provider.chat() generator and
|
| 424 |
+
FastAPI's async StreamingResponse.
|
| 425 |
+
|
| 426 |
+
Strategy:
|
| 427 |
+
1. Spin up a background thread that runs provider.chat() and
|
| 428 |
+
pushes each chunk into an asyncio.Queue.
|
| 429 |
+
2. Yield SSE-formatted chunks from the queue in the async loop.
|
| 430 |
+
"""
|
| 431 |
+
loop = asyncio.get_event_loop()
|
| 432 |
+
q: asyncio.Queue = asyncio.Queue(maxsize=256)
|
| 433 |
+
chunk_id = f"chatcmpl-{uuid.uuid4().hex[:20]}"
|
| 434 |
+
cancel = threading.Event()
|
| 435 |
+
|
| 436 |
+
# Build kwargs for provider
|
| 437 |
+
messages = [{"role": m.role, "content": m.content} for m in req.messages]
|
| 438 |
+
kwargs: dict = {
|
| 439 |
+
"messages": messages,
|
| 440 |
+
"temperature": req.temperature,
|
| 441 |
+
}
|
| 442 |
+
if req.model:
|
| 443 |
+
kwargs["model"] = req.model
|
| 444 |
+
if req.max_tokens:
|
| 445 |
+
kwargs["max_tokens"] = req.max_tokens
|
| 446 |
+
if req.system:
|
| 447 |
+
kwargs["system"] = req.system
|
| 448 |
+
|
| 449 |
+
# ββ Worker thread ββββββββββββββββββββββββββββ
|
| 450 |
+
def _worker():
|
| 451 |
+
try:
|
| 452 |
+
for chunk in provider.chat(**kwargs):
|
| 453 |
+
if cancel.is_set():
|
| 454 |
+
break
|
| 455 |
+
fut = asyncio.run_coroutine_threadsafe(q.put(chunk), loop)
|
| 456 |
+
fut.result(timeout=10) # backpressure: block thread if queue full
|
| 457 |
+
except Exception as exc:
|
| 458 |
+
err = RuntimeError(f"Stream error: {exc}")
|
| 459 |
+
asyncio.run_coroutine_threadsafe(q.put(err), loop).result(timeout=5)
|
| 460 |
+
finally:
|
| 461 |
+
asyncio.run_coroutine_threadsafe(q.put(None), loop).result(timeout=5)
|
| 462 |
+
|
| 463 |
+
t = threading.Thread(target=_worker, daemon=True)
|
| 464 |
+
t.start()
|
| 465 |
+
|
| 466 |
+
# ββ Async consumer ββββββββββββββββββββββββββββ
|
| 467 |
+
try:
|
| 468 |
+
while True:
|
| 469 |
+
item = await asyncio.wait_for(q.get(), timeout=STREAM_TIMEOUT)
|
| 470 |
+
|
| 471 |
+
if item is None: # sentinel β stream done
|
| 472 |
+
yield _sse_done(req.model, chunk_id)
|
| 473 |
+
break
|
| 474 |
+
|
| 475 |
+
if isinstance(item, Exception): # error from worker
|
| 476 |
+
yield _sse_error(str(item))
|
| 477 |
+
break
|
| 478 |
+
|
| 479 |
+
if item: # normal text chunk
|
| 480 |
+
yield _sse_chunk(item, req.model, chunk_id)
|
| 481 |
+
|
| 482 |
+
except asyncio.TimeoutError:
|
| 483 |
+
cancel.set()
|
| 484 |
+
yield _sse_error("Stream timed out β no data received")
|
| 485 |
+
|
| 486 |
+
finally:
|
| 487 |
+
cancel.set()
|
| 488 |
+
t.join(timeout=5)
|
| 489 |
+
|
| 490 |
+
|
| 491 |
+
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 492 |
+
# ENDPOINTS
|
| 493 |
+
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 494 |
+
|
| 495 |
+
@app.get("/", tags=["Info"])
|
| 496 |
+
async def root():
|
| 497 |
+
return {
|
| 498 |
+
"service": "Cloudflare AI API",
|
| 499 |
+
"version": "1.0.0",
|
| 500 |
+
"status": "running",
|
| 501 |
+
"endpoints": {
|
| 502 |
+
"chat": "POST /v1/chat/completions",
|
| 503 |
+
"models": "GET /v1/models",
|
| 504 |
+
"health": "GET /health",
|
| 505 |
+
"docs": "GET /docs",
|
| 506 |
+
},
|
| 507 |
+
}
|
| 508 |
+
|
| 509 |
+
|
| 510 |
+
@app.get("/health", tags=["Info"])
|
| 511 |
+
async def health():
|
| 512 |
+
if pool is None:
|
| 513 |
+
raise HTTPException(503, detail="Pool not yet initialized")
|
| 514 |
+
healthy = sum(1 for m in pool._slots if m.is_healthy())
|
| 515 |
+
status = "ok" if healthy > 0 else "degraded"
|
| 516 |
+
return JSONResponse(
|
| 517 |
+
content={"status": status, "pool": pool.status},
|
| 518 |
+
status_code=200 if status == "ok" else 206,
|
| 519 |
+
)
|
| 520 |
+
|
| 521 |
+
|
| 522 |
+
@app.get("/v1/models", tags=["Models"])
|
| 523 |
+
async def list_models():
|
| 524 |
+
if pool is None:
|
| 525 |
+
raise HTTPException(503, detail="Pool not initialized")
|
| 526 |
+
|
| 527 |
+
async with pool.acquire() as provider:
|
| 528 |
+
models = await asyncio.get_event_loop().run_in_executor(
|
| 529 |
+
None, provider.list_models
|
| 530 |
+
)
|
| 531 |
+
|
| 532 |
+
return {
|
| 533 |
+
"object": "list",
|
| 534 |
+
"data": [
|
| 535 |
+
{
|
| 536 |
+
"id": m["name"],
|
| 537 |
+
"object": "model",
|
| 538 |
+
"created": 0,
|
| 539 |
+
"owned_by": "cloudflare",
|
| 540 |
+
"context_window": m.get("context", 4096),
|
| 541 |
+
}
|
| 542 |
+
for m in models
|
| 543 |
+
],
|
| 544 |
+
}
|
| 545 |
+
|
| 546 |
+
|
| 547 |
+
@app.post("/v1/chat/completions", tags=["Chat"])
|
| 548 |
+
async def chat_completions(req: ChatRequest, request: Request):
|
| 549 |
+
if pool is None:
|
| 550 |
+
raise HTTPException(503, detail="Pool not initialized")
|
| 551 |
+
|
| 552 |
+
if not req.messages:
|
| 553 |
+
raise HTTPException(400, detail="`messages` must not be empty")
|
| 554 |
+
|
| 555 |
+
# ββ Streaming ββββββββββββββββββββββββββββββββββββββββββ
|
| 556 |
+
if req.stream:
|
| 557 |
+
async def _gen():
|
| 558 |
+
async with pool.acquire() as provider:
|
| 559 |
+
async for chunk in _stream_generator(provider, req):
|
| 560 |
+
# Check if client disconnected
|
| 561 |
+
if await request.is_disconnected():
|
| 562 |
+
break
|
| 563 |
+
yield chunk
|
| 564 |
+
|
| 565 |
+
return StreamingResponse(
|
| 566 |
+
_gen(),
|
| 567 |
+
media_type = "text/event-stream",
|
| 568 |
+
headers = {
|
| 569 |
+
"Cache-Control": "no-cache",
|
| 570 |
+
"X-Accel-Buffering": "no",
|
| 571 |
+
"Connection": "keep-alive",
|
| 572 |
+
},
|
| 573 |
+
)
|
| 574 |
+
|
| 575 |
+
# ββ Non-streaming ββββββββββββββββββββββββββββββββββββββ
|
| 576 |
+
messages = [{"role": m.role, "content": m.content} for m in req.messages]
|
| 577 |
+
kwargs: dict = {
|
| 578 |
+
"messages": messages,
|
| 579 |
+
"temperature": req.temperature,
|
| 580 |
+
}
|
| 581 |
+
if req.model:
|
| 582 |
+
kwargs["model"] = req.model
|
| 583 |
+
if req.max_tokens:
|
| 584 |
+
kwargs["max_tokens"] = req.max_tokens
|
| 585 |
+
if req.system:
|
| 586 |
+
kwargs["system"] = req.system
|
| 587 |
+
|
| 588 |
+
loop = asyncio.get_event_loop()
|
| 589 |
+
|
| 590 |
+
async with pool.acquire() as provider:
|
| 591 |
+
full_parts: list[str] = []
|
| 592 |
+
|
| 593 |
+
def _collect():
|
| 594 |
+
for chunk in provider.chat(**kwargs):
|
| 595 |
+
full_parts.append(chunk)
|
| 596 |
+
|
| 597 |
+
await asyncio.wait_for(
|
| 598 |
+
loop.run_in_executor(None, _collect),
|
| 599 |
+
timeout=STREAM_TIMEOUT,
|
| 600 |
+
)
|
| 601 |
+
|
| 602 |
+
response_text = "".join(full_parts)
|
| 603 |
+
|
| 604 |
+
return {
|
| 605 |
+
"id": f"chatcmpl-{uuid.uuid4().hex[:20]}",
|
| 606 |
+
"object": "chat.completion",
|
| 607 |
+
"created": int(time.time()),
|
| 608 |
+
"model": req.model,
|
| 609 |
+
"choices": [{
|
| 610 |
+
"index": 0,
|
| 611 |
+
"message": {"role": "assistant", "content": response_text},
|
| 612 |
+
"finish_reason": "stop",
|
| 613 |
+
}],
|
| 614 |
+
"usage": {
|
| 615 |
+
"prompt_tokens": 0,
|
| 616 |
+
"completion_tokens": 0,
|
| 617 |
+
"total_tokens": 0,
|
| 618 |
+
},
|
| 619 |
+
}
|
| 620 |
+
|
| 621 |
+
|
| 622 |
+
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 623 |
+
# ENTRY POINT
|
| 624 |
+
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 625 |
+
if __name__ == "__main__":
|
| 626 |
+
uvicorn.run(
|
| 627 |
+
"server:app",
|
| 628 |
+
host = HOST,
|
| 629 |
+
port = PORT,
|
| 630 |
+
log_level = "info",
|
| 631 |
+
workers = 1, # single worker β state is in-process
|
| 632 |
+
loop = "asyncio",
|
| 633 |
+
timeout_keep_alive = 30,
|
| 634 |
+
)
|