Spaces:
Running
Running
File size: 2,829 Bytes
38aa070 6806c38 38aa070 6806c38 38aa070 6806c38 38aa070 6806c38 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 |
#!/bin/bash
set -euo pipefail
# Logging functions
log() { echo "[$(date +'%H:%M:%S')] $*"; }
error() { echo "[$(date +'%H:%M:%S')] ERROR: $*" >&2; }
# System resource check
check_system() {
local mem_mb=$(free -m | awk 'NR==2{print $7}')
local cpu_count=$(nproc)
log "Available Memory: ${mem_mb}MB, CPU Cores: ${cpu_count}"
# Adjust threading based on resources
if [ "$mem_mb" -lt 6000 ]; then
export OLLAMA_MAX_QUEUE=2
log "Low memory detected - reduced queue size to 2"
fi
if [ "$cpu_count" -le 2 ]; then
export OMP_NUM_THREADS=2
export MKL_NUM_THREADS=2
log "Limited CPU cores - adjusted thread count"
fi
}
# Wait for service readiness
wait_for_service() {
log "Starting Ollama server..."
ollama serve &
local pid=$!
# Wait up to 60 seconds for service
for i in {1..30}; do
if nc -z localhost 7860 2>/dev/null; then
log "β Ollama service ready on port 7860"
return 0
fi
sleep 2
done
error "Service failed to start within 60 seconds"
kill $pid 2>/dev/null || true
return 1
}
# Model management
setup_model() {
local model="${PRELOAD_MODEL:-}"
if [ -z "$model" ]; then
log "No model preloading specified (set PRELOAD_MODEL env var)"
return 0
fi
log "Attempting to preload model: $model"
# Try to pull model with timeout
if timeout 300 ollama pull "$model" 2>/dev/null; then
log "β Model $model loaded successfully"
# Quick warmup
echo "test" | timeout 15 ollama run "$model" >/dev/null 2>&1 || true
else
log "β Failed to preload $model - will load on demand"
# Try lightweight alternatives
for fallback in "gemma:2b-instruct-q4_0" "phi:2.7b-chat-v0.2-q4_0"; do
log "Trying fallback: $fallback"
if timeout 180 ollama pull "$fallback" 2>/dev/null; then
log "β Fallback model $fallback loaded"
export DEFAULT_MODEL="$fallback"
break
fi
done
fi
}
# Signal handling
cleanup() {
log "Shutting down gracefully..."
pkill -f "ollama serve" 2>/dev/null || true
exit 0
}
trap cleanup SIGTERM SIGINT
# Main execution
main() {
log "Starting Ollama with CPU optimizations"
log "Config: PARALLEL=$OLLAMA_NUM_PARALLEL, QUEUE=$OLLAMA_MAX_QUEUE, KEEP_ALIVE=$OLLAMA_KEEP_ALIVE"
check_system
if wait_for_service; then
setup_model
log "π Ollama ready at http://localhost:7860"
log "Send requests to /api/generate or /api/chat endpoints"
# Keep container alive
wait
else
error "Failed to initialize Ollama"
exit 1
fi
}
main "$@" |