Upload 3 files
Browse files- Dockerfile (2) +37 -0
- README (2).md +48 -0
- entrypoint.sh +45 -0
Dockerfile (2)
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
FROM ubuntu:22.04
|
| 2 |
+
|
| 3 |
+
# Deps
|
| 4 |
+
RUN apt-get update && apt-get install -y \
|
| 5 |
+
curl \
|
| 6 |
+
ca-certificates \
|
| 7 |
+
&& rm -rf /var/lib/apt/lists/*
|
| 8 |
+
|
| 9 |
+
# Install Ollama
|
| 10 |
+
RUN curl -fsSL https://ollama.ai/install.sh | sh
|
| 11 |
+
|
| 12 |
+
# Create non-root user (HF Spaces requires UID 1000)
|
| 13 |
+
RUN useradd -m -u 1000 user
|
| 14 |
+
USER user
|
| 15 |
+
|
| 16 |
+
ENV HOME=/home/user \
|
| 17 |
+
PATH="/home/user/.local/bin:$PATH" \
|
| 18 |
+
OLLAMA_HOST=0.0.0.0:7860 \
|
| 19 |
+
OLLAMA_MODELS=/home/user/.ollama/models
|
| 20 |
+
|
| 21 |
+
WORKDIR $HOME/app
|
| 22 |
+
|
| 23 |
+
COPY --chown=user entrypoint.sh .
|
| 24 |
+
RUN chmod +x entrypoint.sh
|
| 25 |
+
|
| 26 |
+
# Pre-pull model at build time so first request is instant
|
| 27 |
+
# HF Spaces build layer caches this
|
| 28 |
+
USER root
|
| 29 |
+
RUN ollama serve & \
|
| 30 |
+
sleep 5 && \
|
| 31 |
+
ollama pull granite4:350m && \
|
| 32 |
+
pkill ollama || true
|
| 33 |
+
USER user
|
| 34 |
+
|
| 35 |
+
EXPOSE 7860
|
| 36 |
+
|
| 37 |
+
CMD ["./entrypoint.sh"]
|
README (2).md
ADDED
|
@@ -0,0 +1,48 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
title: Ollama Granite4 350m
|
| 3 |
+
emoji: 🪨
|
| 4 |
+
colorFrom: gray
|
| 5 |
+
colorTo: blue
|
| 6 |
+
sdk: docker
|
| 7 |
+
pinned: false
|
| 8 |
+
app_port: 7860
|
| 9 |
+
---
|
| 10 |
+
|
| 11 |
+
# Ollama — IBM Granite 4.0 350m
|
| 12 |
+
|
| 13 |
+
Serwer Ollama z modelem **IBM Granite 4.0 (350m)** udostępniający REST API kompatybilne z Ollama.
|
| 14 |
+
|
| 15 |
+
## Endpoints
|
| 16 |
+
|
| 17 |
+
| Method | Path | Opis |
|
| 18 |
+
|--------|------|------|
|
| 19 |
+
| `GET` | `/api/version` | Wersja Ollama |
|
| 20 |
+
| `GET` | `/api/tags` | Lista dostępnych modeli |
|
| 21 |
+
| `POST` | `/api/generate` | Generowanie tekstu (streaming) |
|
| 22 |
+
| `POST` | `/api/chat` | Chat completions |
|
| 23 |
+
| `POST` | `/api/embeddings` | Embeddingi |
|
| 24 |
+
|
| 25 |
+
## Przykład użycia
|
| 26 |
+
|
| 27 |
+
```bash
|
| 28 |
+
# Generate
|
| 29 |
+
curl https://<your-space-url>/api/generate \
|
| 30 |
+
-d '{"model":"granite4:350m","prompt":"Hello!","stream":false}'
|
| 31 |
+
|
| 32 |
+
# Chat
|
| 33 |
+
curl https://<your-space-url>/api/chat \
|
| 34 |
+
-d '{
|
| 35 |
+
"model": "granite4:350m",
|
| 36 |
+
"messages": [{"role":"user","content":"Explain quantum computing briefly."}],
|
| 37 |
+
"stream": false
|
| 38 |
+
}'
|
| 39 |
+
```
|
| 40 |
+
|
| 41 |
+
## Model
|
| 42 |
+
|
| 43 |
+
- **Model:** IBM Granite 4.0 — 350M params
|
| 44 |
+
- **Architektura:** Transformer (nie hybrydowy Mamba-2)
|
| 45 |
+
- **Tag Ollama:** `granite4:350m`
|
| 46 |
+
- **Kwantyzacja:** Q4_K_M (domyślna)
|
| 47 |
+
- **Rozmiar:** ~250 MB
|
| 48 |
+
- **Zastosowanie:** instrukcje, Q&A, RAG, klasyfikacja, code
|
entrypoint.sh
ADDED
|
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/bin/bash
|
| 2 |
+
set -e
|
| 3 |
+
|
| 4 |
+
echo "==> Starting Ollama server on port 7860..."
|
| 5 |
+
export OLLAMA_HOST=0.0.0.0:7860
|
| 6 |
+
export OLLAMA_MODELS=/home/user/.ollama/models
|
| 7 |
+
|
| 8 |
+
# Start ollama in foreground
|
| 9 |
+
ollama serve &
|
| 10 |
+
OLLAMA_PID=$!
|
| 11 |
+
|
| 12 |
+
# Wait for ollama to be ready
|
| 13 |
+
echo "==> Waiting for Ollama to be ready..."
|
| 14 |
+
MAX_RETRIES=30
|
| 15 |
+
COUNT=0
|
| 16 |
+
until curl -s http://localhost:7860/api/version > /dev/null 2>&1; do
|
| 17 |
+
COUNT=$((COUNT + 1))
|
| 18 |
+
if [ $COUNT -ge $MAX_RETRIES ]; then
|
| 19 |
+
echo "ERROR: Ollama did not start in time."
|
| 20 |
+
exit 1
|
| 21 |
+
fi
|
| 22 |
+
echo " ... attempt $COUNT/$MAX_RETRIES"
|
| 23 |
+
sleep 2
|
| 24 |
+
done
|
| 25 |
+
|
| 26 |
+
echo "==> Ollama is ready!"
|
| 27 |
+
|
| 28 |
+
# Pull model if not cached (fallback in case build layer failed)
|
| 29 |
+
if ! ollama list | grep -q "granite4"; then
|
| 30 |
+
echo "==> Pulling granite4:350m..."
|
| 31 |
+
ollama pull granite4:350m
|
| 32 |
+
fi
|
| 33 |
+
|
| 34 |
+
echo "==> Model available:"
|
| 35 |
+
ollama list
|
| 36 |
+
|
| 37 |
+
echo "==> Ollama API running at http://0.0.0.0:7860"
|
| 38 |
+
echo "==> Endpoints:"
|
| 39 |
+
echo " POST /api/generate"
|
| 40 |
+
echo " POST /api/chat"
|
| 41 |
+
echo " GET /api/tags"
|
| 42 |
+
echo " POST /api/embeddings"
|
| 43 |
+
|
| 44 |
+
# Keep process alive
|
| 45 |
+
wait $OLLAMA_PID
|