#!/bin/bash set -euo pipefail # Logging functions log() { echo "[$(date +'%H:%M:%S')] $*"; } error() { echo "[$(date +'%H:%M:%S')] ERROR: $*" >&2; } # System resource check check_system() { local mem_mb=$(free -m | awk 'NR==2{print $7}') local cpu_count=$(nproc) log "Available Memory: ${mem_mb}MB, CPU Cores: ${cpu_count}" # Adjust threading based on resources if [ "$mem_mb" -lt 6000 ]; then export OLLAMA_MAX_QUEUE=2 log "Low memory detected - reduced queue size to 2" fi if [ "$cpu_count" -le 2 ]; then export OMP_NUM_THREADS=2 export MKL_NUM_THREADS=2 log "Limited CPU cores - adjusted thread count" fi } # Wait for service readiness wait_for_service() { log "Starting Ollama server..." ollama serve & local pid=$! # Wait up to 60 seconds for service for i in {1..30}; do if nc -z localhost 7860 2>/dev/null; then log "✓ Ollama service ready on port 7860" return 0 fi sleep 2 done error "Service failed to start within 60 seconds" kill $pid 2>/dev/null || true return 1 } # Model management setup_model() { local model="${PRELOAD_MODEL:-}" if [ -z "$model" ]; then log "No model preloading specified (set PRELOAD_MODEL env var)" return 0 fi log "Attempting to preload model: $model" # Try to pull model with timeout if timeout 300 ollama pull "$model" 2>/dev/null; then log "✓ Model $model loaded successfully" # Quick warmup echo "test" | timeout 15 ollama run "$model" >/dev/null 2>&1 || true else log "⚠ Failed to preload $model - will load on demand" # Try lightweight alternatives for fallback in "gemma:2b-instruct-q4_0" "phi:2.7b-chat-v0.2-q4_0"; do log "Trying fallback: $fallback" if timeout 180 ollama pull "$fallback" 2>/dev/null; then log "✓ Fallback model $fallback loaded" export DEFAULT_MODEL="$fallback" break fi done fi } # Signal handling cleanup() { log "Shutting down gracefully..." pkill -f "ollama serve" 2>/dev/null || true exit 0 } trap cleanup SIGTERM SIGINT # Main execution main() { log "Starting Ollama with CPU optimizations" log "Config: PARALLEL=$OLLAMA_NUM_PARALLEL, QUEUE=$OLLAMA_MAX_QUEUE, KEEP_ALIVE=$OLLAMA_KEEP_ALIVE" check_system if wait_for_service; then setup_model log "🚀 Ollama ready at http://localhost:7860" log "Send requests to /api/generate or /api/chat endpoints" # Keep container alive wait else error "Failed to initialize Ollama" exit 1 fi } main "$@"