MSGEncrypted commited on
Commit
f173e0f
Β·
1 Parent(s): a3090ac

wip script and monorepo

Browse files
.cursor/plans/hf_space_deploy_review_a7f8b3c3.plan.md CHANGED
@@ -4,13 +4,13 @@ overview: "The existing uv monorepo plan is the right foundation for a Build Sma
4
  todos:
5
  - id: fix-readme-yaml
6
  content: "Put HF Space YAML frontmatter (sdk: docker, app_port: 7860) in root README.md, not only apps/gradio-space/README.md"
7
- status: pending
8
  - id: phase1-bootstrap
9
  content: "Phase 1: uv workspace + inference lib (llama_cpp only) + minimal gr.ChatInterface app"
10
- status: pending
11
  - id: phase1-docker
12
  content: "Phase 1: root Dockerfile (uv sync, UID 1000, port 7860) and create Space under build-small-hackathon"
13
- status: pending
14
  - id: phase1-verify
15
  content: "Phase 1: local uv sync + Gradio smoke test + confirm Space builds on CPU basic"
16
  status: pending
 
4
  todos:
5
  - id: fix-readme-yaml
6
  content: "Put HF Space YAML frontmatter (sdk: docker, app_port: 7860) in root README.md, not only apps/gradio-space/README.md"
7
+ status: completed
8
  - id: phase1-bootstrap
9
  content: "Phase 1: uv workspace + inference lib (llama_cpp only) + minimal gr.ChatInterface app"
10
+ status: completed
11
  - id: phase1-docker
12
  content: "Phase 1: root Dockerfile (uv sync, UID 1000, port 7860) and create Space under build-small-hackathon"
13
+ status: in_progress
14
  - id: phase1-verify
15
  content: "Phase 1: local uv sync + Gradio smoke test + confirm Space builds on CPU basic"
16
  status: pending
.env.example ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ INFERENCE_BACKEND=llama_cpp
2
+ MODEL_REPO=Qwen/Qwen2.5-3B-Instruct-GGUF
3
+ MODEL_FILE=qwen2.5-3b-instruct-q4_k_m.gguf
4
+ N_CTX=4096
5
+ N_GPU_LAYERS=0
6
+
7
+ # Optional: local GGUF path instead of Hub download
8
+ # MODEL_PATH=./models/qwen2.5-3b-instruct-q4_k_m.gguf
9
+
10
+ # Optional: transformers backend (requires inference[transformers] extra)
11
+ # INFERENCE_BACKEND=transformers
12
+ # MODEL_ID=Qwen/Qwen2.5-3B-Instruct
.gitignore ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ .venv/
2
+ __pycache__/
3
+ *.py[cod]
4
+ .env
5
+ models/
6
+ *.gguf
7
+ .ruff_cache/
8
+ .pytest_cache/
9
+ *.egg-info/
10
+ dist/
11
+ build/
.python-version ADDED
@@ -0,0 +1 @@
 
 
1
+ 3.12
Dockerfile ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.12-slim
2
+
3
+ ENV PYTHONUNBUFFERED=1 \
4
+ UV_COMPILE_BYTECODE=1 \
5
+ UV_LINK_MODE=copy
6
+
7
+ RUN apt-get update && apt-get install -y --no-install-recommends \
8
+ build-essential \
9
+ cmake \
10
+ && rm -rf /var/lib/apt/lists/*
11
+
12
+ COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/
13
+
14
+ WORKDIR /app
15
+
16
+ COPY pyproject.toml uv.lock .python-version ./
17
+ COPY apps/gradio-space/pyproject.toml apps/gradio-space/
18
+ COPY libs/inference/pyproject.toml libs/inference/
19
+ COPY apps/gradio-space/src apps/gradio-space/src
20
+ COPY libs/inference/src libs/inference/src
21
+
22
+ RUN useradd -m -u 1000 user && \
23
+ uv sync --frozen --no-dev --package gradio-space && \
24
+ chown -R user:user /app
25
+
26
+ USER user
27
+ ENV HOME=/home/user \
28
+ PATH="/app/.venv/bin:$PATH"
29
+
30
+ EXPOSE 7860
31
+
32
+ CMD ["uv", "run", "--package", "gradio-space", "python", "-m", "gradio_space.app"]
README.md CHANGED
@@ -1 +1,101 @@
1
- # small-model-hackathon
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Small Model Hackathon
3
+ emoji: πŸ¦™
4
+ colorFrom: blue
5
+ colorTo: green
6
+ sdk: docker
7
+ app_port: 7860
8
+ pinned: false
9
+ license: apache-2.0
10
+ ---
11
+
12
+ # Small Model Hackathon
13
+
14
+ Gradio chat Space for the [Build Small Hackathon](https://huggingface.co/build-small-hackathon). Runs local inference with **llama.cpp** (GGUF) by default; optional **transformers** backend via env.
15
+
16
+ ## Prerequisites
17
+
18
+ - [uv](https://docs.astral.sh/uv/)
19
+ - Python 3.12
20
+
21
+ ## Quick start
22
+
23
+ ```bash
24
+ uv sync --all-packages
25
+ cp .env.example .env # optional: edit model settings
26
+
27
+ # Download GGUF for offline dev (optional)
28
+ uv run python scripts/download_model.py
29
+
30
+ # Run Gradio locally
31
+ uv run --package gradio-space python -m gradio_space.app
32
+ ```
33
+
34
+ Open http://localhost:7860. The model downloads from Hugging Face Hub on the first chat message (or set `MODEL_PATH` to a local GGUF).
35
+
36
+ ## Environment variables
37
+
38
+ | Variable | Default | Description |
39
+ |----------|---------|-------------|
40
+ | `INFERENCE_BACKEND` | `llama_cpp` | `llama_cpp` or `transformers` |
41
+ | `MODEL_REPO` | `Qwen/Qwen2.5-3B-Instruct-GGUF` | Hub repo for GGUF |
42
+ | `MODEL_FILE` | `qwen2.5-3b-instruct-q4_k_m.gguf` | GGUF filename |
43
+ | `MODEL_PATH` | β€” | Local GGUF path (skips Hub download) |
44
+ | `N_CTX` | `4096` | Context window |
45
+ | `N_GPU_LAYERS` | `0` | GPU layers for llama.cpp (0 = CPU) |
46
+ | `MODEL_ID` | `Qwen/Qwen2.5-3B-Instruct` | Used when `INFERENCE_BACKEND=transformers` |
47
+
48
+ See [`.env.example`](.env.example) for a full template.
49
+
50
+ ## Monorepo layout
51
+
52
+ ```text
53
+ apps/gradio-space/ # Gradio UI (HF Space entrypoint)
54
+ libs/inference/ # Swappable inference backends
55
+ scripts/ # Dev utilities
56
+ ```
57
+
58
+ ### Common commands
59
+
60
+ ```bash
61
+ uv add --package gradio-space <package>
62
+ uv add --package inference <package>
63
+ uv run --package gradio-space python -m gradio_space.app
64
+ uv run python -c "from inference.factory import get_backend"
65
+ ```
66
+
67
+ ## Hugging Face Space deployment
68
+
69
+ 1. Create a Space under [build-small-hackathon](https://huggingface.co/build-small-hackathon) with **Docker** SDK.
70
+ 2. Link this repository (root `Dockerfile` + root `README.md` YAML above).
71
+ 3. Hardware: start with **CPU basic**; upgrade to GPU if you set `N_GPU_LAYERS > 0`.
72
+ 4. Add Space secrets: `MODEL_REPO`, `MODEL_FILE`, `N_CTX`, `N_GPU_LAYERS`.
73
+
74
+ ```bash
75
+ # Optional local Docker smoke test
76
+ docker build -t hackathon-space .
77
+ docker run --rm -p 7860:7860 -e MODEL_REPO=Qwen/Qwen2.5-3B-Instruct-GGUF hackathon-space
78
+ ```
79
+
80
+ ## Hackathon checklist
81
+
82
+ - [ ] Choose a track (Backyard AI or Thousand Token Wood)
83
+ - [ ] Space live under build-small-hackathon
84
+ - [ ] Demo video recorded
85
+ - [ ] Social post published
86
+ - [ ] Submission locked in by **June 15, 2026**
87
+
88
+ ### Badge targets
89
+
90
+ - **Off-the-Grid** β€” local llama.cpp inference (default setup)
91
+ - **Llama Champion** β€” llama.cpp + GGUF model
92
+ - **Off-Brand** β€” custom UI via `gr.Server` (Phase 2)
93
+ - **Sharing is Caring** β€” agent traces dataset (Phase 2)
94
+
95
+ ## Transformers backend (optional)
96
+
97
+ ```bash
98
+ uv sync --package inference --extra transformers
99
+ INFERENCE_BACKEND=transformers MODEL_ID=Qwen/Qwen2.5-3B-Instruct \
100
+ uv run --package gradio-space python -m gradio_space.app
101
+ ```
pyproject.toml ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [project]
2
+ name = "small-model-hackathon"
3
+ version = "0.1.0"
4
+ description = "Build Small Hackathon β€” Gradio Space with local llama.cpp inference"
5
+ readme = "README.md"
6
+ requires-python = ">=3.12"
7
+ dependencies = [
8
+ "gradio-space",
9
+ "inference",
10
+ ]
11
+
12
+ [dependency-groups]
13
+ dev = [
14
+ "ruff>=0.9.0",
15
+ "pytest>=8.0.0",
16
+ ]
17
+
18
+ [tool.uv.workspace]
19
+ members = [
20
+ "apps/*",
21
+ "libs/*",
22
+ ]
23
+
24
+ [tool.uv.sources]
25
+ gradio-space = { workspace = true }
26
+ inference = { workspace = true }
scripts/download_model.py ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """Download the configured GGUF model from Hugging Face Hub for offline dev."""
3
+
4
+ from __future__ import annotations
5
+
6
+ import argparse
7
+ import os
8
+ from pathlib import Path
9
+
10
+ from huggingface_hub import hf_hub_download
11
+
12
+
13
+ def main() -> None:
14
+ parser = argparse.ArgumentParser(description=__doc__)
15
+ parser.add_argument(
16
+ "--repo",
17
+ default=os.environ.get("MODEL_REPO", "Qwen/Qwen2.5-3B-Instruct-GGUF"),
18
+ help="Hugging Face repo containing the GGUF file",
19
+ )
20
+ parser.add_argument(
21
+ "--file",
22
+ default=os.environ.get("MODEL_FILE", "qwen2.5-3b-instruct-q4_k_m.gguf"),
23
+ help="GGUF filename inside the repo",
24
+ )
25
+ parser.add_argument(
26
+ "--output-dir",
27
+ type=Path,
28
+ default=Path("models"),
29
+ help="Directory to copy/symlink the downloaded model into",
30
+ )
31
+ args = parser.parse_args()
32
+
33
+ args.output_dir.mkdir(parents=True, exist_ok=True)
34
+
35
+ path = hf_hub_download(
36
+ repo_id=args.repo,
37
+ filename=args.file,
38
+ local_dir=args.output_dir,
39
+ local_dir_use_symlinks=False,
40
+ )
41
+ print(f"Model ready at: {path}")
42
+ print(f"Set MODEL_PATH={path} to use this file directly.")
43
+
44
+
45
+ if __name__ == "__main__":
46
+ main()