Spaces:
Sleeping
Sleeping
Commit Β·
f173e0f
1
Parent(s): a3090ac
wip script and monorepo
Browse files- .cursor/plans/hf_space_deploy_review_a7f8b3c3.plan.md +3 -3
- .env.example +12 -0
- .gitignore +11 -0
- .python-version +1 -0
- Dockerfile +32 -0
- README.md +101 -1
- pyproject.toml +26 -0
- scripts/download_model.py +46 -0
.cursor/plans/hf_space_deploy_review_a7f8b3c3.plan.md
CHANGED
|
@@ -4,13 +4,13 @@ overview: "The existing uv monorepo plan is the right foundation for a Build Sma
|
|
| 4 |
todos:
|
| 5 |
- id: fix-readme-yaml
|
| 6 |
content: "Put HF Space YAML frontmatter (sdk: docker, app_port: 7860) in root README.md, not only apps/gradio-space/README.md"
|
| 7 |
-
status:
|
| 8 |
- id: phase1-bootstrap
|
| 9 |
content: "Phase 1: uv workspace + inference lib (llama_cpp only) + minimal gr.ChatInterface app"
|
| 10 |
-
status:
|
| 11 |
- id: phase1-docker
|
| 12 |
content: "Phase 1: root Dockerfile (uv sync, UID 1000, port 7860) and create Space under build-small-hackathon"
|
| 13 |
-
status:
|
| 14 |
- id: phase1-verify
|
| 15 |
content: "Phase 1: local uv sync + Gradio smoke test + confirm Space builds on CPU basic"
|
| 16 |
status: pending
|
|
|
|
| 4 |
todos:
|
| 5 |
- id: fix-readme-yaml
|
| 6 |
content: "Put HF Space YAML frontmatter (sdk: docker, app_port: 7860) in root README.md, not only apps/gradio-space/README.md"
|
| 7 |
+
status: completed
|
| 8 |
- id: phase1-bootstrap
|
| 9 |
content: "Phase 1: uv workspace + inference lib (llama_cpp only) + minimal gr.ChatInterface app"
|
| 10 |
+
status: completed
|
| 11 |
- id: phase1-docker
|
| 12 |
content: "Phase 1: root Dockerfile (uv sync, UID 1000, port 7860) and create Space under build-small-hackathon"
|
| 13 |
+
status: in_progress
|
| 14 |
- id: phase1-verify
|
| 15 |
content: "Phase 1: local uv sync + Gradio smoke test + confirm Space builds on CPU basic"
|
| 16 |
status: pending
|
.env.example
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
INFERENCE_BACKEND=llama_cpp
|
| 2 |
+
MODEL_REPO=Qwen/Qwen2.5-3B-Instruct-GGUF
|
| 3 |
+
MODEL_FILE=qwen2.5-3b-instruct-q4_k_m.gguf
|
| 4 |
+
N_CTX=4096
|
| 5 |
+
N_GPU_LAYERS=0
|
| 6 |
+
|
| 7 |
+
# Optional: local GGUF path instead of Hub download
|
| 8 |
+
# MODEL_PATH=./models/qwen2.5-3b-instruct-q4_k_m.gguf
|
| 9 |
+
|
| 10 |
+
# Optional: transformers backend (requires inference[transformers] extra)
|
| 11 |
+
# INFERENCE_BACKEND=transformers
|
| 12 |
+
# MODEL_ID=Qwen/Qwen2.5-3B-Instruct
|
.gitignore
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
.venv/
|
| 2 |
+
__pycache__/
|
| 3 |
+
*.py[cod]
|
| 4 |
+
.env
|
| 5 |
+
models/
|
| 6 |
+
*.gguf
|
| 7 |
+
.ruff_cache/
|
| 8 |
+
.pytest_cache/
|
| 9 |
+
*.egg-info/
|
| 10 |
+
dist/
|
| 11 |
+
build/
|
.python-version
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
3.12
|
Dockerfile
ADDED
|
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
FROM python:3.12-slim
|
| 2 |
+
|
| 3 |
+
ENV PYTHONUNBUFFERED=1 \
|
| 4 |
+
UV_COMPILE_BYTECODE=1 \
|
| 5 |
+
UV_LINK_MODE=copy
|
| 6 |
+
|
| 7 |
+
RUN apt-get update && apt-get install -y --no-install-recommends \
|
| 8 |
+
build-essential \
|
| 9 |
+
cmake \
|
| 10 |
+
&& rm -rf /var/lib/apt/lists/*
|
| 11 |
+
|
| 12 |
+
COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/
|
| 13 |
+
|
| 14 |
+
WORKDIR /app
|
| 15 |
+
|
| 16 |
+
COPY pyproject.toml uv.lock .python-version ./
|
| 17 |
+
COPY apps/gradio-space/pyproject.toml apps/gradio-space/
|
| 18 |
+
COPY libs/inference/pyproject.toml libs/inference/
|
| 19 |
+
COPY apps/gradio-space/src apps/gradio-space/src
|
| 20 |
+
COPY libs/inference/src libs/inference/src
|
| 21 |
+
|
| 22 |
+
RUN useradd -m -u 1000 user && \
|
| 23 |
+
uv sync --frozen --no-dev --package gradio-space && \
|
| 24 |
+
chown -R user:user /app
|
| 25 |
+
|
| 26 |
+
USER user
|
| 27 |
+
ENV HOME=/home/user \
|
| 28 |
+
PATH="/app/.venv/bin:$PATH"
|
| 29 |
+
|
| 30 |
+
EXPOSE 7860
|
| 31 |
+
|
| 32 |
+
CMD ["uv", "run", "--package", "gradio-space", "python", "-m", "gradio_space.app"]
|
README.md
CHANGED
|
@@ -1 +1,101 @@
|
|
| 1 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
title: Small Model Hackathon
|
| 3 |
+
emoji: π¦
|
| 4 |
+
colorFrom: blue
|
| 5 |
+
colorTo: green
|
| 6 |
+
sdk: docker
|
| 7 |
+
app_port: 7860
|
| 8 |
+
pinned: false
|
| 9 |
+
license: apache-2.0
|
| 10 |
+
---
|
| 11 |
+
|
| 12 |
+
# Small Model Hackathon
|
| 13 |
+
|
| 14 |
+
Gradio chat Space for the [Build Small Hackathon](https://huggingface.co/build-small-hackathon). Runs local inference with **llama.cpp** (GGUF) by default; optional **transformers** backend via env.
|
| 15 |
+
|
| 16 |
+
## Prerequisites
|
| 17 |
+
|
| 18 |
+
- [uv](https://docs.astral.sh/uv/)
|
| 19 |
+
- Python 3.12
|
| 20 |
+
|
| 21 |
+
## Quick start
|
| 22 |
+
|
| 23 |
+
```bash
|
| 24 |
+
uv sync --all-packages
|
| 25 |
+
cp .env.example .env # optional: edit model settings
|
| 26 |
+
|
| 27 |
+
# Download GGUF for offline dev (optional)
|
| 28 |
+
uv run python scripts/download_model.py
|
| 29 |
+
|
| 30 |
+
# Run Gradio locally
|
| 31 |
+
uv run --package gradio-space python -m gradio_space.app
|
| 32 |
+
```
|
| 33 |
+
|
| 34 |
+
Open http://localhost:7860. The model downloads from Hugging Face Hub on the first chat message (or set `MODEL_PATH` to a local GGUF).
|
| 35 |
+
|
| 36 |
+
## Environment variables
|
| 37 |
+
|
| 38 |
+
| Variable | Default | Description |
|
| 39 |
+
|----------|---------|-------------|
|
| 40 |
+
| `INFERENCE_BACKEND` | `llama_cpp` | `llama_cpp` or `transformers` |
|
| 41 |
+
| `MODEL_REPO` | `Qwen/Qwen2.5-3B-Instruct-GGUF` | Hub repo for GGUF |
|
| 42 |
+
| `MODEL_FILE` | `qwen2.5-3b-instruct-q4_k_m.gguf` | GGUF filename |
|
| 43 |
+
| `MODEL_PATH` | β | Local GGUF path (skips Hub download) |
|
| 44 |
+
| `N_CTX` | `4096` | Context window |
|
| 45 |
+
| `N_GPU_LAYERS` | `0` | GPU layers for llama.cpp (0 = CPU) |
|
| 46 |
+
| `MODEL_ID` | `Qwen/Qwen2.5-3B-Instruct` | Used when `INFERENCE_BACKEND=transformers` |
|
| 47 |
+
|
| 48 |
+
See [`.env.example`](.env.example) for a full template.
|
| 49 |
+
|
| 50 |
+
## Monorepo layout
|
| 51 |
+
|
| 52 |
+
```text
|
| 53 |
+
apps/gradio-space/ # Gradio UI (HF Space entrypoint)
|
| 54 |
+
libs/inference/ # Swappable inference backends
|
| 55 |
+
scripts/ # Dev utilities
|
| 56 |
+
```
|
| 57 |
+
|
| 58 |
+
### Common commands
|
| 59 |
+
|
| 60 |
+
```bash
|
| 61 |
+
uv add --package gradio-space <package>
|
| 62 |
+
uv add --package inference <package>
|
| 63 |
+
uv run --package gradio-space python -m gradio_space.app
|
| 64 |
+
uv run python -c "from inference.factory import get_backend"
|
| 65 |
+
```
|
| 66 |
+
|
| 67 |
+
## Hugging Face Space deployment
|
| 68 |
+
|
| 69 |
+
1. Create a Space under [build-small-hackathon](https://huggingface.co/build-small-hackathon) with **Docker** SDK.
|
| 70 |
+
2. Link this repository (root `Dockerfile` + root `README.md` YAML above).
|
| 71 |
+
3. Hardware: start with **CPU basic**; upgrade to GPU if you set `N_GPU_LAYERS > 0`.
|
| 72 |
+
4. Add Space secrets: `MODEL_REPO`, `MODEL_FILE`, `N_CTX`, `N_GPU_LAYERS`.
|
| 73 |
+
|
| 74 |
+
```bash
|
| 75 |
+
# Optional local Docker smoke test
|
| 76 |
+
docker build -t hackathon-space .
|
| 77 |
+
docker run --rm -p 7860:7860 -e MODEL_REPO=Qwen/Qwen2.5-3B-Instruct-GGUF hackathon-space
|
| 78 |
+
```
|
| 79 |
+
|
| 80 |
+
## Hackathon checklist
|
| 81 |
+
|
| 82 |
+
- [ ] Choose a track (Backyard AI or Thousand Token Wood)
|
| 83 |
+
- [ ] Space live under build-small-hackathon
|
| 84 |
+
- [ ] Demo video recorded
|
| 85 |
+
- [ ] Social post published
|
| 86 |
+
- [ ] Submission locked in by **June 15, 2026**
|
| 87 |
+
|
| 88 |
+
### Badge targets
|
| 89 |
+
|
| 90 |
+
- **Off-the-Grid** β local llama.cpp inference (default setup)
|
| 91 |
+
- **Llama Champion** β llama.cpp + GGUF model
|
| 92 |
+
- **Off-Brand** β custom UI via `gr.Server` (Phase 2)
|
| 93 |
+
- **Sharing is Caring** β agent traces dataset (Phase 2)
|
| 94 |
+
|
| 95 |
+
## Transformers backend (optional)
|
| 96 |
+
|
| 97 |
+
```bash
|
| 98 |
+
uv sync --package inference --extra transformers
|
| 99 |
+
INFERENCE_BACKEND=transformers MODEL_ID=Qwen/Qwen2.5-3B-Instruct \
|
| 100 |
+
uv run --package gradio-space python -m gradio_space.app
|
| 101 |
+
```
|
pyproject.toml
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[project]
|
| 2 |
+
name = "small-model-hackathon"
|
| 3 |
+
version = "0.1.0"
|
| 4 |
+
description = "Build Small Hackathon β Gradio Space with local llama.cpp inference"
|
| 5 |
+
readme = "README.md"
|
| 6 |
+
requires-python = ">=3.12"
|
| 7 |
+
dependencies = [
|
| 8 |
+
"gradio-space",
|
| 9 |
+
"inference",
|
| 10 |
+
]
|
| 11 |
+
|
| 12 |
+
[dependency-groups]
|
| 13 |
+
dev = [
|
| 14 |
+
"ruff>=0.9.0",
|
| 15 |
+
"pytest>=8.0.0",
|
| 16 |
+
]
|
| 17 |
+
|
| 18 |
+
[tool.uv.workspace]
|
| 19 |
+
members = [
|
| 20 |
+
"apps/*",
|
| 21 |
+
"libs/*",
|
| 22 |
+
]
|
| 23 |
+
|
| 24 |
+
[tool.uv.sources]
|
| 25 |
+
gradio-space = { workspace = true }
|
| 26 |
+
inference = { workspace = true }
|
scripts/download_model.py
ADDED
|
@@ -0,0 +1,46 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""Download the configured GGUF model from Hugging Face Hub for offline dev."""
|
| 3 |
+
|
| 4 |
+
from __future__ import annotations
|
| 5 |
+
|
| 6 |
+
import argparse
|
| 7 |
+
import os
|
| 8 |
+
from pathlib import Path
|
| 9 |
+
|
| 10 |
+
from huggingface_hub import hf_hub_download
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
def main() -> None:
|
| 14 |
+
parser = argparse.ArgumentParser(description=__doc__)
|
| 15 |
+
parser.add_argument(
|
| 16 |
+
"--repo",
|
| 17 |
+
default=os.environ.get("MODEL_REPO", "Qwen/Qwen2.5-3B-Instruct-GGUF"),
|
| 18 |
+
help="Hugging Face repo containing the GGUF file",
|
| 19 |
+
)
|
| 20 |
+
parser.add_argument(
|
| 21 |
+
"--file",
|
| 22 |
+
default=os.environ.get("MODEL_FILE", "qwen2.5-3b-instruct-q4_k_m.gguf"),
|
| 23 |
+
help="GGUF filename inside the repo",
|
| 24 |
+
)
|
| 25 |
+
parser.add_argument(
|
| 26 |
+
"--output-dir",
|
| 27 |
+
type=Path,
|
| 28 |
+
default=Path("models"),
|
| 29 |
+
help="Directory to copy/symlink the downloaded model into",
|
| 30 |
+
)
|
| 31 |
+
args = parser.parse_args()
|
| 32 |
+
|
| 33 |
+
args.output_dir.mkdir(parents=True, exist_ok=True)
|
| 34 |
+
|
| 35 |
+
path = hf_hub_download(
|
| 36 |
+
repo_id=args.repo,
|
| 37 |
+
filename=args.file,
|
| 38 |
+
local_dir=args.output_dir,
|
| 39 |
+
local_dir_use_symlinks=False,
|
| 40 |
+
)
|
| 41 |
+
print(f"Model ready at: {path}")
|
| 42 |
+
print(f"Set MODEL_PATH={path} to use this file directly.")
|
| 43 |
+
|
| 44 |
+
|
| 45 |
+
if __name__ == "__main__":
|
| 46 |
+
main()
|