Spaces:

cometapii
/

ollamapi

Sleeping

App Files Files Community

cometapii commited on 25 days ago

Commit

84aa332

verified ·

1 Parent(s): 010ee78

Upload 4 files

Browse files

Files changed (4) hide show

Dockerfile +44 -0
README.md +38 -5
entrypoint.sh +41 -0
proxy.py +69 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,44 @@

+FROM ubuntu:22.04
+# Deps
+RUN apt-get update && apt-get install -y \
+    curl \
+    ca-certificates \
+    zstd \
+    python3 \
+    python3-pip \
+    && rm -rf /var/lib/apt/lists/*
+# Install Ollama
+RUN curl -fsSL https://ollama.ai/install.sh | sh
+# Install Python deps system-wide
+COPY requirements.txt /tmp/requirements.txt
+RUN pip3 install --no-cache-dir -r /tmp/requirements.txt
+# Create non-root user (HF Spaces requires UID 1000)
+RUN useradd -m -u 1000 user
+ENV HOME=/home/user \
+    PATH="/home/user/.local/bin:$PATH" \
+    OLLAMA_HOST=127.0.0.1:11434 \
+    OLLAMA_MODELS=/home/user/.ollama/models
+WORKDIR /home/user/app
+COPY --chown=user entrypoint.sh .
+COPY --chown=user proxy.py .
+RUN chmod +x entrypoint.sh
+# Pre-pull models at build time
+RUN OLLAMA_HOST=127.0.0.1:11434 ollama serve & \
+    sleep 8 && \
+    ollama pull deepseek-r1:latest && \
+    ollama pull qwen3-vl:latest && \
+    pkill ollama || true
+USER user
+EXPOSE 7860
+CMD ["./entrypoint.sh"]

README.md CHANGED Viewed

@@ -1,10 +1,43 @@
 ---
-title: Ollamapi
-emoji: 👀
-colorFrom: purple
-colorTo: green
 sdk: docker
 pinned: false
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: Ollama DeepSeek-R1 + Qwen3-VL
+emoji: 🤖
+colorFrom: blue
+colorTo: purple
 sdk: docker
 pinned: false
+app_port: 7860
 ---
+# Ollama — DeepSeek-R1 + Qwen3-VL
+Serwer Ollama z dwoma modelami, chroniony kluczem API.
+| Model | Typ | Rozmiar |
+|-------|-----|---------|
+| `deepseek-r1:latest` | LLM reasoning | ~4.7GB |
+| `qwen3-vl:latest` | Vision-Language | ~5.4GB |
+## Autoryzacja
+Każde żądanie wymaga nagłówka:
+```
+Authorization: Bearer connectkey
+```
+## Endpoints
+| Method | Path | Opis |
+|--------|------|------|
+| `GET` | `/api/version` | Wersja Ollama |
+| `GET` | `/api/tags` | Lista modeli |
+| `POST` | `/api/generate` | Generowanie (stream) |
+| `POST` | `/api/chat` | Chat (stream) |
+| `POST` | `/api/embeddings` | Embeddingi |
+## Przykład
+```bash
+curl https://<user>-<space>.hf.space/api/chat \
+  -H "Authorization: Bearer connectkey" \
+  -d '{"model":"deepseek-r1:latest","messages":[{"role":"user","content":"Hello!"}]}'
+```

entrypoint.sh ADDED Viewed

	@@ -0,0 +1,41 @@

+#!/bin/bash
+set -e
+echo "==> Starting Ollama (internal on 11434)..."
+export OLLAMA_HOST=127.0.0.1:11434
+export OLLAMA_MODELS=/home/user/.ollama/models
+ollama serve &
+# Wait for ollama ready
+echo "==> Waiting for Ollama..."
+MAX_RETRIES=30
+COUNT=0
+until curl -s http://127.0.0.1:11434/api/version > /dev/null 2>&1; do
+    COUNT=$((COUNT + 1))
+    if [ $COUNT -ge $MAX_RETRIES ]; then
+        echo "ERROR: Ollama did not start."
+        exit 1
+    fi
+    echo "  ... attempt $COUNT/$MAX_RETRIES"
+    sleep 2
+done
+echo "==> Ollama ready!"
+# Pull models if not cached (fallback)
+if ! ollama list | grep -q "deepseek-r1"; then
+    echo "==> Pulling deepseek-r1:latest..."
+    ollama pull deepseek-r1:latest
+fi
+if ! ollama list | grep -q "qwen3-vl"; then
+    echo "==> Pulling qwen3-vl:latest..."
+    ollama pull qwen3-vl:latest
+fi
+echo "==> Models available:"
+ollama list
+echo "==> Starting FastAPI proxy on 0.0.0.0:7860 (API key: connectkey)..."
+exec uvicorn proxy:app --host 0.0.0.0 --port 7860

proxy.py ADDED Viewed

	@@ -0,0 +1,69 @@

+from fastapi import FastAPI, Request, HTTPException, Depends
+from fastapi.responses import StreamingResponse
+from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials
+import httpx
+import json
+app = FastAPI()
+security = HTTPBearer()
+API_KEY = "connectkey"
+OLLAMA_BASE = "http://127.0.0.1:11434"
+MODELS = ["deepseek-r1:latest", "qwen3-vl:latest"]
+def verify_key(credentials: HTTPAuthorizationCredentials = Depends(security)):
+    if credentials.credentials != API_KEY:
+        raise HTTPException(status_code=401, detail="Invalid API key")
+    return credentials.credentials
+@app.get("/api/version")
+async def version(key: str = Depends(verify_key)):
+    async with httpx.AsyncClient() as client:
+        r = await client.get(f"{OLLAMA_BASE}/api/version")
+        return r.json()
+@app.get("/api/tags")
+async def tags(key: str = Depends(verify_key)):
+    async with httpx.AsyncClient() as client:
+        r = await client.get(f"{OLLAMA_BASE}/api/tags")
+        return r.json()
+async def _stream(url: str, body: dict):
+    body["stream"] = True
+    async with httpx.AsyncClient(timeout=None) as client:
+        async with client.stream("POST", url, json=body) as r:
+            async for chunk in r.aiter_bytes():
+                yield chunk
+@app.post("/api/generate")
+async def generate(request: Request, key: str = Depends(verify_key)):
+    body = await request.json()
+    body["stream"] = True
+    return StreamingResponse(
+        _stream(f"{OLLAMA_BASE}/api/generate", body),
+        media_type="application/x-ndjson",
+    )
+@app.post("/api/chat")
+async def chat(request: Request, key: str = Depends(verify_key)):
+    body = await request.json()
+    body["stream"] = True
+    return StreamingResponse(
+        _stream(f"{OLLAMA_BASE}/api/chat", body),
+        media_type="application/x-ndjson",
+    )
+@app.post("/api/embeddings")
+async def embeddings(request: Request, key: str = Depends(verify_key)):
+    body = await request.json()
+    async with httpx.AsyncClient(timeout=None) as client:
+        r = await client.post(f"{OLLAMA_BASE}/api/embeddings", json=body)
+        return r.json()