Wire HF Spaces deploymen
Browse files- .dockerignore +25 -0
- .github/workflows/sync-huggingface.yml +53 -0
- .gitignore +4 -1
- Dockerfile +59 -0
- README.md +59 -0
- tiny_vllm/server.py +35 -4
- web/app.js +15 -0
- web/index.html +3 -1
- web/style.css +20 -2
.dockerignore
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
__pycache__/
|
| 2 |
+
*.py[cod]
|
| 3 |
+
*.egg-info/
|
| 4 |
+
.venv/
|
| 5 |
+
venv/
|
| 6 |
+
.git/
|
| 7 |
+
.github/
|
| 8 |
+
.pytest_cache/
|
| 9 |
+
.mypy_cache/
|
| 10 |
+
.ruff_cache/
|
| 11 |
+
tests/
|
| 12 |
+
examples/
|
| 13 |
+
scripts/
|
| 14 |
+
.DS_Store
|
| 15 |
+
*.log
|
| 16 |
+
.cache/
|
| 17 |
+
hf_cache/
|
| 18 |
+
.vscode/
|
| 19 |
+
.idea/
|
| 20 |
+
.claude/
|
| 21 |
+
.cursor/
|
| 22 |
+
.env
|
| 23 |
+
.env.*
|
| 24 |
+
secrets/
|
| 25 |
+
|
.github/workflows/sync-huggingface.yml
ADDED
|
@@ -0,0 +1,53 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
name: Sync to Hugging Face Space
|
| 2 |
+
|
| 3 |
+
# Mirrors the repo to your Hugging Face Space on every push to main.
|
| 4 |
+
# HF Spaces then rebuilds the Docker image and redeploys.
|
| 5 |
+
#
|
| 6 |
+
# One-time setup (see README "Hugging Face Space — live demo"):
|
| 7 |
+
# 1. Create the Space at https://huggingface.co/new-space (SDK: Docker).
|
| 8 |
+
# 2. Generate a write-access token at https://huggingface.co/settings/tokens.
|
| 9 |
+
# 3. In the GitHub repo: Settings → Secrets and variables → Actions → add:
|
| 10 |
+
# HF_TOKEN ← the token from step 2
|
| 11 |
+
# HF_USERNAME ← your HF username (e.g. surajsharan)
|
| 12 |
+
# HF_SPACE_NAME ← the Space name (e.g. tiny-vllm)
|
| 13 |
+
# 4. Push to main, or trigger this workflow manually from the Actions tab.
|
| 14 |
+
|
| 15 |
+
on:
|
| 16 |
+
push:
|
| 17 |
+
branches: [main]
|
| 18 |
+
workflow_dispatch:
|
| 19 |
+
|
| 20 |
+
concurrency:
|
| 21 |
+
group: huggingface-sync
|
| 22 |
+
cancel-in-progress: false
|
| 23 |
+
|
| 24 |
+
jobs:
|
| 25 |
+
sync:
|
| 26 |
+
runs-on: ubuntu-latest
|
| 27 |
+
steps:
|
| 28 |
+
- uses: actions/checkout@v4
|
| 29 |
+
with:
|
| 30 |
+
fetch-depth: 0
|
| 31 |
+
lfs: true
|
| 32 |
+
|
| 33 |
+
- name: Push to HF Space
|
| 34 |
+
env:
|
| 35 |
+
HF_TOKEN: ${{ secrets.HF_TOKEN }}
|
| 36 |
+
HF_USERNAME: ${{ secrets.HF_USERNAME }}
|
| 37 |
+
HF_SPACE_NAME: ${{ secrets.HF_SPACE_NAME }}
|
| 38 |
+
run: |
|
| 39 |
+
set -e
|
| 40 |
+
if [ -z "$HF_TOKEN" ] || [ -z "$HF_USERNAME" ] || [ -z "$HF_SPACE_NAME" ]; then
|
| 41 |
+
echo "::notice::HF secrets not configured (HF_TOKEN/HF_USERNAME/HF_SPACE_NAME)."
|
| 42 |
+
echo "::notice::Skipping HF Space sync. See README for setup."
|
| 43 |
+
exit 0
|
| 44 |
+
fi
|
| 45 |
+
git config user.email "actions@github.com"
|
| 46 |
+
git config user.name "github-actions[bot]"
|
| 47 |
+
REMOTE="https://${HF_USERNAME}:${HF_TOKEN}@huggingface.co/spaces/${HF_USERNAME}/${HF_SPACE_NAME}"
|
| 48 |
+
git remote add huggingface "$REMOTE"
|
| 49 |
+
# Force-push: the HF Space repo is a mirror of this branch.
|
| 50 |
+
# If you also commit on HF (e.g., README edits in the Space UI),
|
| 51 |
+
# those would be overwritten — keep edits in this repo.
|
| 52 |
+
git push --force huggingface HEAD:main
|
| 53 |
+
echo "::notice::Synced to https://huggingface.co/spaces/${HF_USERNAME}/${HF_SPACE_NAME}"
|
.gitignore
CHANGED
|
@@ -16,4 +16,7 @@ hf_cache/
|
|
| 16 |
.vscode/
|
| 17 |
.idea/
|
| 18 |
.claude/
|
| 19 |
-
.cursor/
|
|
|
|
|
|
|
|
|
|
|
|
| 16 |
.vscode/
|
| 17 |
.idea/
|
| 18 |
.claude/
|
| 19 |
+
.cursor/
|
| 20 |
+
.env
|
| 21 |
+
.env.*
|
| 22 |
+
secrets/
|
Dockerfile
ADDED
|
@@ -0,0 +1,59 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Dockerfile for the Hugging Face Spaces deployment.
|
| 2 |
+
#
|
| 3 |
+
# This image is small enough to fit comfortably in HF's free CPU tier
|
| 4 |
+
# (16 GB RAM, 2 vCPU): CPU-only torch + pre-downloaded Qwen2.5-0.5B.
|
| 5 |
+
#
|
| 6 |
+
# HF Spaces convention: listen on port 7860, bound to 0.0.0.0.
|
| 7 |
+
|
| 8 |
+
FROM python:3.11-slim
|
| 9 |
+
|
| 10 |
+
ENV PYTHONUNBUFFERED=1 \
|
| 11 |
+
PYTHONDONTWRITEBYTECODE=1 \
|
| 12 |
+
PIP_NO_CACHE_DIR=1 \
|
| 13 |
+
PIP_DISABLE_PIP_VERSION_CHECK=1 \
|
| 14 |
+
HF_HOME=/tmp/.cache/huggingface \
|
| 15 |
+
TRANSFORMERS_CACHE=/tmp/.cache/huggingface/transformers \
|
| 16 |
+
TINY_VLLM_MODEL=Qwen/Qwen2.5-0.5B-Instruct \
|
| 17 |
+
TINY_VLLM_DEVICE=cpu \
|
| 18 |
+
TINY_VLLM_DTYPE=float32
|
| 19 |
+
|
| 20 |
+
WORKDIR /app
|
| 21 |
+
|
| 22 |
+
# Minimal system deps: curl for healthcheck, ca-certs for HTTPS.
|
| 23 |
+
RUN apt-get update && apt-get install -y --no-install-recommends \
|
| 24 |
+
curl ca-certificates \
|
| 25 |
+
&& rm -rf /var/lib/apt/lists/*
|
| 26 |
+
|
| 27 |
+
# Install CPU-only PyTorch first (much smaller than the default GPU build).
|
| 28 |
+
RUN pip install --upgrade pip \
|
| 29 |
+
&& pip install torch --index-url https://download.pytorch.org/whl/cpu
|
| 30 |
+
|
| 31 |
+
# Install the rest of our deps (skip torch — already done).
|
| 32 |
+
COPY requirements.txt .
|
| 33 |
+
RUN grep -v '^torch' requirements.txt > requirements.no-torch.txt \
|
| 34 |
+
&& pip install -r requirements.no-torch.txt
|
| 35 |
+
|
| 36 |
+
# Pre-download the model so cold-start latency is just engine warmup.
|
| 37 |
+
# Failing this step at build time is better than failing on first request.
|
| 38 |
+
RUN python -c "import os; m=os.environ['TINY_VLLM_MODEL']; \
|
| 39 |
+
from transformers import AutoTokenizer, AutoModelForCausalLM; \
|
| 40 |
+
AutoTokenizer.from_pretrained(m); \
|
| 41 |
+
AutoModelForCausalLM.from_pretrained(m); \
|
| 42 |
+
print(f'pre-fetched {m}')"
|
| 43 |
+
|
| 44 |
+
# Now copy the source (placed AFTER the heavy deps so layer cache helps reruns).
|
| 45 |
+
COPY tiny_vllm/ ./tiny_vllm/
|
| 46 |
+
COPY web/ ./web/
|
| 47 |
+
COPY README.md LICENSE pyproject.toml ./
|
| 48 |
+
|
| 49 |
+
EXPOSE 7860
|
| 50 |
+
|
| 51 |
+
HEALTHCHECK --interval=30s --timeout=5s --start-period=120s \
|
| 52 |
+
CMD curl -fsS http://localhost:7860/health || exit 1
|
| 53 |
+
|
| 54 |
+
# Conservative resource settings — HF free CPU is small.
|
| 55 |
+
CMD ["python", "-m", "tiny_vllm.server", \
|
| 56 |
+
"--host", "0.0.0.0", "--port", "7860", \
|
| 57 |
+
"--block-size", "16", "--num-blocks", "128", \
|
| 58 |
+
"--max-num-seqs", "4", "--max-num-batched-tokens", "256", \
|
| 59 |
+
"--max-model-len", "1024"]
|
README.md
CHANGED
|
@@ -1,3 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
# tiny_vllm
|
| 2 |
|
| 3 |
A **minimal continuous-batching LLM engine** built to be read end-to-end. It
|
|
@@ -76,6 +87,54 @@ pip install pytest
|
|
| 76 |
python -m pytest tests/
|
| 77 |
```
|
| 78 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 79 |
## GitHub Pages demo (replay mode)
|
| 80 |
|
| 81 |
The visualization can run as a **static page** on GitHub Pages with no
|
|
|
|
| 1 |
+
---
|
| 2 |
+
title: tiny_vllm
|
| 3 |
+
emoji: 🪶
|
| 4 |
+
colorFrom: gray
|
| 5 |
+
colorTo: green
|
| 6 |
+
sdk: docker
|
| 7 |
+
app_port: 7860
|
| 8 |
+
pinned: false
|
| 9 |
+
short_description: Minimal continuous-batching LLM engine — paged KV, prefix caching, SSE
|
| 10 |
+
---
|
| 11 |
+
|
| 12 |
# tiny_vllm
|
| 13 |
|
| 14 |
A **minimal continuous-batching LLM engine** built to be read end-to-end. It
|
|
|
|
| 87 |
python -m pytest tests/
|
| 88 |
```
|
| 89 |
|
| 90 |
+
## Hugging Face Space — live demo
|
| 91 |
+
|
| 92 |
+
For a *live* (not recorded) demo you can talk to from any browser, deploy this
|
| 93 |
+
repo as a Docker-based Hugging Face Space. HF's free CPU tier (16 GB RAM,
|
| 94 |
+
2 vCPU) fits Qwen2.5-0.5B comfortably.
|
| 95 |
+
|
| 96 |
+
**One-time setup:**
|
| 97 |
+
|
| 98 |
+
1. **Create the Space.** Go to [huggingface.co/new-space](https://huggingface.co/new-space):
|
| 99 |
+
- Owner: your HF username
|
| 100 |
+
- Space name: e.g. `tiny-vllm` (must match `HF_SPACE_NAME` below)
|
| 101 |
+
- SDK: **Docker**
|
| 102 |
+
- License: MIT
|
| 103 |
+
2. **Generate a write-access token** at
|
| 104 |
+
[huggingface.co/settings/tokens](https://huggingface.co/settings/tokens) → New
|
| 105 |
+
token → role **Write**.
|
| 106 |
+
3. **Add three secrets** to this GitHub repo (Settings → Secrets and variables
|
| 107 |
+
→ Actions → New repository secret):
|
| 108 |
+
- `HF_TOKEN` — the token from step 2
|
| 109 |
+
- `HF_USERNAME` — your HF username
|
| 110 |
+
- `HF_SPACE_NAME` — e.g. `tiny-vllm`
|
| 111 |
+
|
| 112 |
+
On the next push to `main`, the `Sync to Hugging Face Space` workflow mirrors
|
| 113 |
+
the repo to the Space. HF then builds the Docker image (~3–5 min on first
|
| 114 |
+
build because of the pre-fetched model) and the Space goes live at:
|
| 115 |
+
|
| 116 |
+
```
|
| 117 |
+
https://<lowercased-HF_USERNAME>-<HF_SPACE_NAME>.hf.space
|
| 118 |
+
```
|
| 119 |
+
|
| 120 |
+
(HF normalises subdomains to lowercase — `enCoder/tiny-vllm` becomes
|
| 121 |
+
`encoder-tiny-vllm.hf.space`.)
|
| 122 |
+
|
| 123 |
+
The GH Pages page links to this URL as a **"try live ↗"** pill in the
|
| 124 |
+
topbar — update `data-hf-space` on `<body>` in `web/index.html` if your
|
| 125 |
+
Space URL differs.
|
| 126 |
+
|
| 127 |
+
**HF Spaces cost: free.** Cold-start (after ~48 h of inactivity) takes ~30 s
|
| 128 |
+
while the container wakes; subsequent requests are warm.
|
| 129 |
+
|
| 130 |
+
**Files involved:**
|
| 131 |
+
- `Dockerfile` — CPU-only torch, pre-downloads the model at build time.
|
| 132 |
+
- `README.md` frontmatter — HF reads `sdk: docker`, `app_port: 7860`, etc.
|
| 133 |
+
- `.github/workflows/sync-huggingface.yml` — mirrors GitHub → HF Spaces.
|
| 134 |
+
- CORS is enabled on the server so the GH Pages frontend can call the HF
|
| 135 |
+
backend cross-origin (`?mode=live&backend=https://...hf.space` is a
|
| 136 |
+
potential future addition).
|
| 137 |
+
|
| 138 |
## GitHub Pages demo (replay mode)
|
| 139 |
|
| 140 |
The visualization can run as a **static page** on GitHub Pages with no
|
tiny_vllm/server.py
CHANGED
|
@@ -23,6 +23,7 @@ from pathlib import Path
|
|
| 23 |
from typing import AsyncIterator, Optional
|
| 24 |
|
| 25 |
from fastapi import FastAPI, HTTPException, Request
|
|
|
|
| 26 |
from fastapi.responses import FileResponse, JSONResponse, StreamingResponse
|
| 27 |
from fastapi.staticfiles import StaticFiles
|
| 28 |
from pydantic import BaseModel, Field
|
|
@@ -70,8 +71,18 @@ def _sse(data: dict | str) -> bytes:
|
|
| 70 |
return f"data: {data}\n\n".encode("utf-8")
|
| 71 |
|
| 72 |
|
| 73 |
-
def build_app(config: EngineConfig) -> FastAPI:
|
| 74 |
app = FastAPI(title="tiny_vllm", version="0.1.0")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 75 |
engine = LLMEngine(config)
|
| 76 |
|
| 77 |
@app.on_event("startup")
|
|
@@ -114,6 +125,14 @@ def build_app(config: EngineConfig) -> FastAPI:
|
|
| 114 |
|
| 115 |
# ---- introspection -------------------------------------------------
|
| 116 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 117 |
@app.get("/engine/snapshot")
|
| 118 |
async def snapshot() -> dict:
|
| 119 |
return engine.snapshot()
|
|
@@ -300,8 +319,14 @@ def main() -> None:
|
|
| 300 |
parser.add_argument("--record", default=None,
|
| 301 |
help="Append every engine event to this JSONL file "
|
| 302 |
"(e.g. web/events.jsonl) to power the static replay demo.")
|
| 303 |
-
parser.add_argument("--host", default="0.0.0.0")
|
| 304 |
-
parser.add_argument("--port", type=int,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 305 |
args = parser.parse_args()
|
| 306 |
|
| 307 |
cfg = EngineConfig(
|
|
@@ -317,8 +342,14 @@ def main() -> None:
|
|
| 317 |
record_path=args.record,
|
| 318 |
)
|
| 319 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 320 |
import uvicorn
|
| 321 |
-
app = build_app(cfg)
|
| 322 |
uvicorn.run(app, host=args.host, port=args.port, log_level="info")
|
| 323 |
|
| 324 |
|
|
|
|
| 23 |
from typing import AsyncIterator, Optional
|
| 24 |
|
| 25 |
from fastapi import FastAPI, HTTPException, Request
|
| 26 |
+
from fastapi.middleware.cors import CORSMiddleware
|
| 27 |
from fastapi.responses import FileResponse, JSONResponse, StreamingResponse
|
| 28 |
from fastapi.staticfiles import StaticFiles
|
| 29 |
from pydantic import BaseModel, Field
|
|
|
|
| 71 |
return f"data: {data}\n\n".encode("utf-8")
|
| 72 |
|
| 73 |
|
| 74 |
+
def build_app(config: EngineConfig, cors_allow_origins: Optional[list[str]] = None) -> FastAPI:
|
| 75 |
app = FastAPI(title="tiny_vllm", version="0.1.0")
|
| 76 |
+
# CORS so the GH-Pages frontend (or any external page) can call this
|
| 77 |
+
# backend when it's deployed on HF Spaces. Read-only inference, so the
|
| 78 |
+
# blast radius of opening this up is small.
|
| 79 |
+
app.add_middleware(
|
| 80 |
+
CORSMiddleware,
|
| 81 |
+
allow_origins=cors_allow_origins or ["*"],
|
| 82 |
+
allow_credentials=False,
|
| 83 |
+
allow_methods=["*"],
|
| 84 |
+
allow_headers=["*"],
|
| 85 |
+
)
|
| 86 |
engine = LLMEngine(config)
|
| 87 |
|
| 88 |
@app.on_event("startup")
|
|
|
|
| 125 |
|
| 126 |
# ---- introspection -------------------------------------------------
|
| 127 |
|
| 128 |
+
@app.get("/health")
|
| 129 |
+
async def health() -> dict:
|
| 130 |
+
return {
|
| 131 |
+
"status": "ok" if engine.model_runner is not None else "starting",
|
| 132 |
+
"model": config.model,
|
| 133 |
+
"device": config.device,
|
| 134 |
+
}
|
| 135 |
+
|
| 136 |
@app.get("/engine/snapshot")
|
| 137 |
async def snapshot() -> dict:
|
| 138 |
return engine.snapshot()
|
|
|
|
| 319 |
parser.add_argument("--record", default=None,
|
| 320 |
help="Append every engine event to this JSONL file "
|
| 321 |
"(e.g. web/events.jsonl) to power the static replay demo.")
|
| 322 |
+
parser.add_argument("--host", default=os.environ.get("HOST", "0.0.0.0"))
|
| 323 |
+
parser.add_argument("--port", type=int,
|
| 324 |
+
default=int(os.environ.get("PORT", "8000")))
|
| 325 |
+
parser.add_argument(
|
| 326 |
+
"--cors-origins", default=os.environ.get("TINY_VLLM_CORS_ORIGINS", "*"),
|
| 327 |
+
help="Comma-separated allowed origins for CORS (default '*' — fine "
|
| 328 |
+
"for the demo since this server is read-only inference).",
|
| 329 |
+
)
|
| 330 |
args = parser.parse_args()
|
| 331 |
|
| 332 |
cfg = EngineConfig(
|
|
|
|
| 342 |
record_path=args.record,
|
| 343 |
)
|
| 344 |
|
| 345 |
+
cors_origins: list[str] | None
|
| 346 |
+
if args.cors_origins.strip() in ("*", ""):
|
| 347 |
+
cors_origins = None # defaults to ["*"]
|
| 348 |
+
else:
|
| 349 |
+
cors_origins = [o.strip() for o in args.cors_origins.split(",") if o.strip()]
|
| 350 |
+
|
| 351 |
import uvicorn
|
| 352 |
+
app = build_app(cfg, cors_allow_origins=cors_origins)
|
| 353 |
uvicorn.run(app, host=args.host, port=args.port, log_level="info")
|
| 354 |
|
| 355 |
|
web/app.js
CHANGED
|
@@ -467,6 +467,21 @@ if (ui.playPause) ui.playPause.addEventListener("click", () => {
|
|
| 467 |
});
|
| 468 |
if (ui.restart) ui.restart.addEventListener("click", () => state.replay?.start());
|
| 469 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 470 |
// ---------- entry point ----------
|
| 471 |
|
| 472 |
(function boot() {
|
|
|
|
| 467 |
});
|
| 468 |
if (ui.restart) ui.restart.addEventListener("click", () => state.replay?.start());
|
| 469 |
|
| 470 |
+
// ---------- "Try live" link → Hugging Face Space ----------
|
| 471 |
+
|
| 472 |
+
(function setupHFLink() {
|
| 473 |
+
const url = document.body.getAttribute("data-hf-space") || "";
|
| 474 |
+
// Don't advertise the live link if we're already on it (avoids
|
| 475 |
+
// showing "try live →" while on the live page).
|
| 476 |
+
const onHF = /\.hf\.space$/i.test(location.hostname) ||
|
| 477 |
+
/huggingface\.co$/i.test(location.hostname);
|
| 478 |
+
if (!url || onHF) return;
|
| 479 |
+
const top = document.getElementById("hf-live");
|
| 480 |
+
if (top) { top.href = url; top.style.display = ""; }
|
| 481 |
+
const rl = document.getElementById("rl-hf");
|
| 482 |
+
if (rl) { rl.href = url; rl.style.display = ""; }
|
| 483 |
+
})();
|
| 484 |
+
|
| 485 |
// ---------- entry point ----------
|
| 486 |
|
| 487 |
(function boot() {
|
web/index.html
CHANGED
|
@@ -10,7 +10,7 @@
|
|
| 10 |
<link rel="stylesheet" href="https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600;700&family=JetBrains+Mono:wght@400;500;600&display=swap">
|
| 11 |
<link rel="stylesheet" href="style.css">
|
| 12 |
</head>
|
| 13 |
-
<body>
|
| 14 |
|
| 15 |
<nav class="topbar">
|
| 16 |
<div class="crumb">
|
|
@@ -21,6 +21,7 @@
|
|
| 21 |
<span>tiny_vllm</span>
|
| 22 |
</div>
|
| 23 |
<div class="right">
|
|
|
|
| 24 |
<a href="https://github.com/surajsharan/tiny_vLLM" target="_blank" rel="noopener">github ↗</a>
|
| 25 |
<a href="https://surajsharan.github.io/" title="Back to portfolio">← back</a>
|
| 26 |
</div>
|
|
@@ -67,6 +68,7 @@ python -m tiny_vllm.server --model Qwen/Qwen2.5-0.5B-Instruct
|
|
| 67 |
# then open http://localhost:8000</code></pre>
|
| 68 |
<div class="rl-foot">
|
| 69 |
<button id="rl-copy" class="ghost">Copy command</button>
|
|
|
|
| 70 |
<button id="rl-close" class="ghost">Dismiss</button>
|
| 71 |
</div>
|
| 72 |
</div>
|
|
|
|
| 10 |
<link rel="stylesheet" href="https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600;700&family=JetBrains+Mono:wght@400;500;600&display=swap">
|
| 11 |
<link rel="stylesheet" href="style.css">
|
| 12 |
</head>
|
| 13 |
+
<body data-hf-space="https://encoder-tiny-vllm.hf.space">
|
| 14 |
|
| 15 |
<nav class="topbar">
|
| 16 |
<div class="crumb">
|
|
|
|
| 21 |
<span>tiny_vllm</span>
|
| 22 |
</div>
|
| 23 |
<div class="right">
|
| 24 |
+
<a id="hf-live" href="#" target="_blank" rel="noopener" class="live-link" style="display:none">try live ↗</a>
|
| 25 |
<a href="https://github.com/surajsharan/tiny_vLLM" target="_blank" rel="noopener">github ↗</a>
|
| 26 |
<a href="https://surajsharan.github.io/" title="Back to portfolio">← back</a>
|
| 27 |
</div>
|
|
|
|
| 68 |
# then open http://localhost:8000</code></pre>
|
| 69 |
<div class="rl-foot">
|
| 70 |
<button id="rl-copy" class="ghost">Copy command</button>
|
| 71 |
+
<a id="rl-hf" class="rl-cta" href="#" target="_blank" rel="noopener" style="display:none">Or use the hosted live demo ↗</a>
|
| 72 |
<button id="rl-close" class="ghost">Dismiss</button>
|
| 73 |
</div>
|
| 74 |
</div>
|
web/style.css
CHANGED
|
@@ -65,9 +65,17 @@ body {
|
|
| 65 |
.topbar a:hover { color: var(--accent); }
|
| 66 |
.topbar .crumb { color: var(--muted); }
|
| 67 |
.topbar .crumb .sep { margin: 0 6px; }
|
| 68 |
-
.topbar .right { display: flex; gap:
|
| 69 |
.topbar .right a { color: var(--muted); }
|
| 70 |
.topbar .right a:hover { color: var(--accent); }
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 71 |
|
| 72 |
header.hero {
|
| 73 |
padding: 28px 24px 20px;
|
|
@@ -231,7 +239,17 @@ textarea:disabled { opacity: 0.5; }
|
|
| 231 |
color: var(--accent);
|
| 232 |
overflow-x: auto;
|
| 233 |
}
|
| 234 |
-
.run-locally .rl-foot { display: flex; gap: 8px; }
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 235 |
|
| 236 |
.replay-controls { margin-left: auto; display: flex; gap: 6px; align-items: center; }
|
| 237 |
.replay-controls select {
|
|
|
|
| 65 |
.topbar a:hover { color: var(--accent); }
|
| 66 |
.topbar .crumb { color: var(--muted); }
|
| 67 |
.topbar .crumb .sep { margin: 0 6px; }
|
| 68 |
+
.topbar .right { display: flex; gap: 14px; align-items: center; }
|
| 69 |
.topbar .right a { color: var(--muted); }
|
| 70 |
.topbar .right a:hover { color: var(--accent); }
|
| 71 |
+
.topbar .right a.live-link {
|
| 72 |
+
color: var(--bg);
|
| 73 |
+
background: var(--accent);
|
| 74 |
+
padding: 3px 10px;
|
| 75 |
+
border-radius: 999px;
|
| 76 |
+
font-weight: 600;
|
| 77 |
+
}
|
| 78 |
+
.topbar .right a.live-link:hover { background: var(--accent-dim); color: var(--bg); }
|
| 79 |
|
| 80 |
header.hero {
|
| 81 |
padding: 28px 24px 20px;
|
|
|
|
| 239 |
color: var(--accent);
|
| 240 |
overflow-x: auto;
|
| 241 |
}
|
| 242 |
+
.run-locally .rl-foot { display: flex; gap: 8px; align-items: center; flex-wrap: wrap; }
|
| 243 |
+
.run-locally .rl-cta {
|
| 244 |
+
margin-left: auto;
|
| 245 |
+
color: var(--bg);
|
| 246 |
+
background: var(--accent);
|
| 247 |
+
padding: 6px 12px;
|
| 248 |
+
border-radius: var(--radius);
|
| 249 |
+
font-family: var(--mono); font-size: 12px; font-weight: 600;
|
| 250 |
+
text-decoration: none;
|
| 251 |
+
}
|
| 252 |
+
.run-locally .rl-cta:hover { background: var(--accent-dim); }
|
| 253 |
|
| 254 |
.replay-controls { margin-left: auto; display: flex; gap: 6px; align-items: center; }
|
| 255 |
.replay-controls select {
|