ollama-server / entrypoint.sh
wwforonce's picture
minor fix ollama
6806c38
#!/bin/bash
set -euo pipefail
# Logging functions
log() { echo "[$(date +'%H:%M:%S')] $*"; }
error() { echo "[$(date +'%H:%M:%S')] ERROR: $*" >&2; }
# System resource check
check_system() {
local mem_mb=$(free -m | awk 'NR==2{print $7}')
local cpu_count=$(nproc)
log "Available Memory: ${mem_mb}MB, CPU Cores: ${cpu_count}"
# Adjust threading based on resources
if [ "$mem_mb" -lt 6000 ]; then
export OLLAMA_MAX_QUEUE=2
log "Low memory detected - reduced queue size to 2"
fi
if [ "$cpu_count" -le 2 ]; then
export OMP_NUM_THREADS=2
export MKL_NUM_THREADS=2
log "Limited CPU cores - adjusted thread count"
fi
}
# Wait for service readiness
wait_for_service() {
log "Starting Ollama server..."
ollama serve &
local pid=$!
# Wait up to 60 seconds for service
for i in {1..30}; do
if nc -z localhost 7860 2>/dev/null; then
log "βœ“ Ollama service ready on port 7860"
return 0
fi
sleep 2
done
error "Service failed to start within 60 seconds"
kill $pid 2>/dev/null || true
return 1
}
# Model management
setup_model() {
local model="${PRELOAD_MODEL:-}"
if [ -z "$model" ]; then
log "No model preloading specified (set PRELOAD_MODEL env var)"
return 0
fi
log "Attempting to preload model: $model"
# Try to pull model with timeout
if timeout 300 ollama pull "$model" 2>/dev/null; then
log "βœ“ Model $model loaded successfully"
# Quick warmup
echo "test" | timeout 15 ollama run "$model" >/dev/null 2>&1 || true
else
log "⚠ Failed to preload $model - will load on demand"
# Try lightweight alternatives
for fallback in "gemma:2b-instruct-q4_0" "phi:2.7b-chat-v0.2-q4_0"; do
log "Trying fallback: $fallback"
if timeout 180 ollama pull "$fallback" 2>/dev/null; then
log "βœ“ Fallback model $fallback loaded"
export DEFAULT_MODEL="$fallback"
break
fi
done
fi
}
# Signal handling
cleanup() {
log "Shutting down gracefully..."
pkill -f "ollama serve" 2>/dev/null || true
exit 0
}
trap cleanup SIGTERM SIGINT
# Main execution
main() {
log "Starting Ollama with CPU optimizations"
log "Config: PARALLEL=$OLLAMA_NUM_PARALLEL, QUEUE=$OLLAMA_MAX_QUEUE, KEEP_ALIVE=$OLLAMA_KEEP_ALIVE"
check_system
if wait_for_service; then
setup_model
log "πŸš€ Ollama ready at http://localhost:7860"
log "Send requests to /api/generate or /api/chat endpoints"
# Keep container alive
wait
else
error "Failed to initialize Ollama"
exit 1
fi
}
main "$@"