cometapii commited on
Commit
1f2477d
·
verified ·
1 Parent(s): 205db80

Upload 3 files

Browse files
Files changed (3) hide show
  1. Dockerfile (2) +37 -0
  2. README (2).md +48 -0
  3. entrypoint.sh +45 -0
Dockerfile (2) ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM ubuntu:22.04
2
+
3
+ # Deps
4
+ RUN apt-get update && apt-get install -y \
5
+ curl \
6
+ ca-certificates \
7
+ && rm -rf /var/lib/apt/lists/*
8
+
9
+ # Install Ollama
10
+ RUN curl -fsSL https://ollama.ai/install.sh | sh
11
+
12
+ # Create non-root user (HF Spaces requires UID 1000)
13
+ RUN useradd -m -u 1000 user
14
+ USER user
15
+
16
+ ENV HOME=/home/user \
17
+ PATH="/home/user/.local/bin:$PATH" \
18
+ OLLAMA_HOST=0.0.0.0:7860 \
19
+ OLLAMA_MODELS=/home/user/.ollama/models
20
+
21
+ WORKDIR $HOME/app
22
+
23
+ COPY --chown=user entrypoint.sh .
24
+ RUN chmod +x entrypoint.sh
25
+
26
+ # Pre-pull model at build time so first request is instant
27
+ # HF Spaces build layer caches this
28
+ USER root
29
+ RUN ollama serve & \
30
+ sleep 5 && \
31
+ ollama pull granite4:350m && \
32
+ pkill ollama || true
33
+ USER user
34
+
35
+ EXPOSE 7860
36
+
37
+ CMD ["./entrypoint.sh"]
README (2).md ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Ollama Granite4 350m
3
+ emoji: 🪨
4
+ colorFrom: gray
5
+ colorTo: blue
6
+ sdk: docker
7
+ pinned: false
8
+ app_port: 7860
9
+ ---
10
+
11
+ # Ollama — IBM Granite 4.0 350m
12
+
13
+ Serwer Ollama z modelem **IBM Granite 4.0 (350m)** udostępniający REST API kompatybilne z Ollama.
14
+
15
+ ## Endpoints
16
+
17
+ | Method | Path | Opis |
18
+ |--------|------|------|
19
+ | `GET` | `/api/version` | Wersja Ollama |
20
+ | `GET` | `/api/tags` | Lista dostępnych modeli |
21
+ | `POST` | `/api/generate` | Generowanie tekstu (streaming) |
22
+ | `POST` | `/api/chat` | Chat completions |
23
+ | `POST` | `/api/embeddings` | Embeddingi |
24
+
25
+ ## Przykład użycia
26
+
27
+ ```bash
28
+ # Generate
29
+ curl https://<your-space-url>/api/generate \
30
+ -d '{"model":"granite4:350m","prompt":"Hello!","stream":false}'
31
+
32
+ # Chat
33
+ curl https://<your-space-url>/api/chat \
34
+ -d '{
35
+ "model": "granite4:350m",
36
+ "messages": [{"role":"user","content":"Explain quantum computing briefly."}],
37
+ "stream": false
38
+ }'
39
+ ```
40
+
41
+ ## Model
42
+
43
+ - **Model:** IBM Granite 4.0 — 350M params
44
+ - **Architektura:** Transformer (nie hybrydowy Mamba-2)
45
+ - **Tag Ollama:** `granite4:350m`
46
+ - **Kwantyzacja:** Q4_K_M (domyślna)
47
+ - **Rozmiar:** ~250 MB
48
+ - **Zastosowanie:** instrukcje, Q&A, RAG, klasyfikacja, code
entrypoint.sh ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+ set -e
3
+
4
+ echo "==> Starting Ollama server on port 7860..."
5
+ export OLLAMA_HOST=0.0.0.0:7860
6
+ export OLLAMA_MODELS=/home/user/.ollama/models
7
+
8
+ # Start ollama in foreground
9
+ ollama serve &
10
+ OLLAMA_PID=$!
11
+
12
+ # Wait for ollama to be ready
13
+ echo "==> Waiting for Ollama to be ready..."
14
+ MAX_RETRIES=30
15
+ COUNT=0
16
+ until curl -s http://localhost:7860/api/version > /dev/null 2>&1; do
17
+ COUNT=$((COUNT + 1))
18
+ if [ $COUNT -ge $MAX_RETRIES ]; then
19
+ echo "ERROR: Ollama did not start in time."
20
+ exit 1
21
+ fi
22
+ echo " ... attempt $COUNT/$MAX_RETRIES"
23
+ sleep 2
24
+ done
25
+
26
+ echo "==> Ollama is ready!"
27
+
28
+ # Pull model if not cached (fallback in case build layer failed)
29
+ if ! ollama list | grep -q "granite4"; then
30
+ echo "==> Pulling granite4:350m..."
31
+ ollama pull granite4:350m
32
+ fi
33
+
34
+ echo "==> Model available:"
35
+ ollama list
36
+
37
+ echo "==> Ollama API running at http://0.0.0.0:7860"
38
+ echo "==> Endpoints:"
39
+ echo " POST /api/generate"
40
+ echo " POST /api/chat"
41
+ echo " GET /api/tags"
42
+ echo " POST /api/embeddings"
43
+
44
+ # Keep process alive
45
+ wait $OLLAMA_PID