Spaces:
Sleeping
Sleeping
File size: 2,223 Bytes
24eb4d9 e689824 12101fc e689824 24eb4d9 c86442b 24eb4d9 e689824 24eb4d9 4882a5b c86442b 4882a5b 65945db 24eb4d9 c86442b 24eb4d9 e689824 12101fc e689824 12101fc e689824 12101fc e689824 12101fc e689824 12101fc 24eb4d9 12101fc c86442b e689824 12101fc 65945db 12101fc c86442b 7ace3bf |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 |
#!/usr/bin/env bash
set -euo pipefail
MODEL_NAME="${MODEL_NAME:-qwen2.5-coder:3b}"
TRIAGE_MODEL="${TRIAGE_MODEL:-$MODEL_NAME}"
ACTIONS_MODEL="${ACTIONS_MODEL:-$MODEL_NAME}"
WRITER_MODEL="${WRITER_MODEL:-$MODEL_NAME}"
OLLAMA_URL="${OLLAMA_URL:-http://127.0.0.1:11434}"
export OLLAMA_URL MODEL_NAME TRIAGE_MODEL ACTIONS_MODEL WRITER_MODEL
echo "[start.sh] Starting Ollama..."
ollama serve > /tmp/ollama.log 2>&1 &
OLLAMA_PID=$!
# Wait until Ollama is ready
python3 - << 'PY'
import os, time, urllib.request
base = os.environ.get("OLLAMA_URL","http://127.0.0.1:11434").rstrip("/")
url = base + "/api/tags"
for _ in range(180):
try:
urllib.request.urlopen(url, timeout=1).read()
print("Ollama is up")
raise SystemExit(0)
except Exception:
time.sleep(1)
raise SystemExit("Ollama did not start")
PY
# Unique model list
MODELS=("$MODEL_NAME" "$TRIAGE_MODEL" "$ACTIONS_MODEL" "$WRITER_MODEL")
UNIQ=()
for m in "${MODELS[@]}"; do
seen=false
for u in "${UNIQ[@]}"; do
if [ "$u" = "$m" ]; then seen=true; fi
done
if [ "$seen" = false ]; then UNIQ+=("$m"); fi
done
echo "[start.sh] Ensuring models exist..."
for m in "${UNIQ[@]}"; do
echo "[start.sh] model: $m"
if ! ollama show "$m" >/dev/null 2>&1; then
echo "[start.sh] pulling $m..."
ollama pull "$m"
else
echo "[start.sh] already present"
fi
done
echo "[start.sh] Warming up models..."
python3 - << 'PY'
import os, json, urllib.request
base = os.environ.get("OLLAMA_URL","http://127.0.0.1:11434").rstrip("/")
ctx = int(os.environ.get("OLLAMA_CONTEXT_LENGTH","4096"))
models = []
for k in ("MODEL_NAME","TRIAGE_MODEL","ACTIONS_MODEL","WRITER_MODEL"):
v = os.environ.get(k)
if v and v not in models:
models.append(v)
for m in models:
payload = {
"model": m,
"prompt": "ping",
"stream": False,
"options": {"num_ctx": ctx, "num_predict": 16}
}
req = urllib.request.Request(
base + "/api/generate",
data=json.dumps(payload).encode(),
headers={"Content-Type":"application/json"},
)
urllib.request.urlopen(req, timeout=1800).read()
print("Warmup OK:", m)
PY
echo "[start.sh] Launching app..."
exec python3 /app/app.py |