cometapii commited on
Commit
84aa332
·
verified ·
1 Parent(s): 010ee78

Upload 4 files

Browse files
Files changed (4) hide show
  1. Dockerfile +44 -0
  2. README.md +38 -5
  3. entrypoint.sh +41 -0
  4. proxy.py +69 -0
Dockerfile ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM ubuntu:22.04
2
+
3
+ # Deps
4
+ RUN apt-get update && apt-get install -y \
5
+ curl \
6
+ ca-certificates \
7
+ zstd \
8
+ python3 \
9
+ python3-pip \
10
+ && rm -rf /var/lib/apt/lists/*
11
+
12
+ # Install Ollama
13
+ RUN curl -fsSL https://ollama.ai/install.sh | sh
14
+
15
+ # Install Python deps system-wide
16
+ COPY requirements.txt /tmp/requirements.txt
17
+ RUN pip3 install --no-cache-dir -r /tmp/requirements.txt
18
+
19
+ # Create non-root user (HF Spaces requires UID 1000)
20
+ RUN useradd -m -u 1000 user
21
+
22
+ ENV HOME=/home/user \
23
+ PATH="/home/user/.local/bin:$PATH" \
24
+ OLLAMA_HOST=127.0.0.1:11434 \
25
+ OLLAMA_MODELS=/home/user/.ollama/models
26
+
27
+ WORKDIR /home/user/app
28
+
29
+ COPY --chown=user entrypoint.sh .
30
+ COPY --chown=user proxy.py .
31
+ RUN chmod +x entrypoint.sh
32
+
33
+ # Pre-pull models at build time
34
+ RUN OLLAMA_HOST=127.0.0.1:11434 ollama serve & \
35
+ sleep 8 && \
36
+ ollama pull deepseek-r1:latest && \
37
+ ollama pull qwen3-vl:latest && \
38
+ pkill ollama || true
39
+
40
+ USER user
41
+
42
+ EXPOSE 7860
43
+
44
+ CMD ["./entrypoint.sh"]
README.md CHANGED
@@ -1,10 +1,43 @@
1
  ---
2
- title: Ollamapi
3
- emoji: 👀
4
- colorFrom: purple
5
- colorTo: green
6
  sdk: docker
7
  pinned: false
 
8
  ---
9
 
10
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
+ title: Ollama DeepSeek-R1 + Qwen3-VL
3
+ emoji: 🤖
4
+ colorFrom: blue
5
+ colorTo: purple
6
  sdk: docker
7
  pinned: false
8
+ app_port: 7860
9
  ---
10
 
11
+ # Ollama DeepSeek-R1 + Qwen3-VL
12
+
13
+ Serwer Ollama z dwoma modelami, chroniony kluczem API.
14
+
15
+ | Model | Typ | Rozmiar |
16
+ |-------|-----|---------|
17
+ | `deepseek-r1:latest` | LLM reasoning | ~4.7GB |
18
+ | `qwen3-vl:latest` | Vision-Language | ~5.4GB |
19
+
20
+ ## Autoryzacja
21
+
22
+ Każde żądanie wymaga nagłówka:
23
+ ```
24
+ Authorization: Bearer connectkey
25
+ ```
26
+
27
+ ## Endpoints
28
+
29
+ | Method | Path | Opis |
30
+ |--------|------|------|
31
+ | `GET` | `/api/version` | Wersja Ollama |
32
+ | `GET` | `/api/tags` | Lista modeli |
33
+ | `POST` | `/api/generate` | Generowanie (stream) |
34
+ | `POST` | `/api/chat` | Chat (stream) |
35
+ | `POST` | `/api/embeddings` | Embeddingi |
36
+
37
+ ## Przykład
38
+
39
+ ```bash
40
+ curl https://<user>-<space>.hf.space/api/chat \
41
+ -H "Authorization: Bearer connectkey" \
42
+ -d '{"model":"deepseek-r1:latest","messages":[{"role":"user","content":"Hello!"}]}'
43
+ ```
entrypoint.sh ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+ set -e
3
+
4
+ echo "==> Starting Ollama (internal on 11434)..."
5
+ export OLLAMA_HOST=127.0.0.1:11434
6
+ export OLLAMA_MODELS=/home/user/.ollama/models
7
+
8
+ ollama serve &
9
+
10
+ # Wait for ollama ready
11
+ echo "==> Waiting for Ollama..."
12
+ MAX_RETRIES=30
13
+ COUNT=0
14
+ until curl -s http://127.0.0.1:11434/api/version > /dev/null 2>&1; do
15
+ COUNT=$((COUNT + 1))
16
+ if [ $COUNT -ge $MAX_RETRIES ]; then
17
+ echo "ERROR: Ollama did not start."
18
+ exit 1
19
+ fi
20
+ echo " ... attempt $COUNT/$MAX_RETRIES"
21
+ sleep 2
22
+ done
23
+
24
+ echo "==> Ollama ready!"
25
+
26
+ # Pull models if not cached (fallback)
27
+ if ! ollama list | grep -q "deepseek-r1"; then
28
+ echo "==> Pulling deepseek-r1:latest..."
29
+ ollama pull deepseek-r1:latest
30
+ fi
31
+
32
+ if ! ollama list | grep -q "qwen3-vl"; then
33
+ echo "==> Pulling qwen3-vl:latest..."
34
+ ollama pull qwen3-vl:latest
35
+ fi
36
+
37
+ echo "==> Models available:"
38
+ ollama list
39
+
40
+ echo "==> Starting FastAPI proxy on 0.0.0.0:7860 (API key: connectkey)..."
41
+ exec uvicorn proxy:app --host 0.0.0.0 --port 7860
proxy.py ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI, Request, HTTPException, Depends
2
+ from fastapi.responses import StreamingResponse
3
+ from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials
4
+ import httpx
5
+ import json
6
+
7
+ app = FastAPI()
8
+ security = HTTPBearer()
9
+
10
+ API_KEY = "connectkey"
11
+ OLLAMA_BASE = "http://127.0.0.1:11434"
12
+
13
+ MODELS = ["deepseek-r1:latest", "qwen3-vl:latest"]
14
+
15
+
16
+ def verify_key(credentials: HTTPAuthorizationCredentials = Depends(security)):
17
+ if credentials.credentials != API_KEY:
18
+ raise HTTPException(status_code=401, detail="Invalid API key")
19
+ return credentials.credentials
20
+
21
+
22
+ @app.get("/api/version")
23
+ async def version(key: str = Depends(verify_key)):
24
+ async with httpx.AsyncClient() as client:
25
+ r = await client.get(f"{OLLAMA_BASE}/api/version")
26
+ return r.json()
27
+
28
+
29
+ @app.get("/api/tags")
30
+ async def tags(key: str = Depends(verify_key)):
31
+ async with httpx.AsyncClient() as client:
32
+ r = await client.get(f"{OLLAMA_BASE}/api/tags")
33
+ return r.json()
34
+
35
+
36
+ async def _stream(url: str, body: dict):
37
+ body["stream"] = True
38
+ async with httpx.AsyncClient(timeout=None) as client:
39
+ async with client.stream("POST", url, json=body) as r:
40
+ async for chunk in r.aiter_bytes():
41
+ yield chunk
42
+
43
+
44
+ @app.post("/api/generate")
45
+ async def generate(request: Request, key: str = Depends(verify_key)):
46
+ body = await request.json()
47
+ body["stream"] = True
48
+ return StreamingResponse(
49
+ _stream(f"{OLLAMA_BASE}/api/generate", body),
50
+ media_type="application/x-ndjson",
51
+ )
52
+
53
+
54
+ @app.post("/api/chat")
55
+ async def chat(request: Request, key: str = Depends(verify_key)):
56
+ body = await request.json()
57
+ body["stream"] = True
58
+ return StreamingResponse(
59
+ _stream(f"{OLLAMA_BASE}/api/chat", body),
60
+ media_type="application/x-ndjson",
61
+ )
62
+
63
+
64
+ @app.post("/api/embeddings")
65
+ async def embeddings(request: Request, key: str = Depends(verify_key)):
66
+ body = await request.json()
67
+ async with httpx.AsyncClient(timeout=None) as client:
68
+ r = await client.post(f"{OLLAMA_BASE}/api/embeddings", json=body)
69
+ return r.json()